diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 42497c6be9567..91eae7ca3c442 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,4 @@ +<<<<<<< HEAD # Contributing ## License @@ -92,3 +93,15 @@ Merge of pull request is done only by project maintainers. There are three optio Squashing is done to shorten history and make sure that the project is buildable on any commit. - [Create a merge commit] Used for pull down PRs to avoid duplication of LLVM commits. +======= +# Contributing to LLVM + +Thank you for your interest in contributing to LLVM! There are many ways to +contribute, and we appreciate all contributions. + +To get started with contributing, please take a look at the +[Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide. It +describes how to get involved, raise issues and submit patches. Please note +that at the moment the LLVM project does not use either Github pull requests +or Github issues. +>>>>>>> effcdc3a82f2a32829170e7f7a2ff3d7853b612d diff --git a/README.md b/README.md index e09fe289dd8af..0e4e25176b9a9 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,18 @@ ## Introduction +<<<<<<< HEAD Intel staging area for llvm.org contribution. Home for Intel LLVM-based projects: - SYCL* Compiler and Runtimes - compiler and runtime libraries for SYCL ([https://www.khronos.org/sycl/](https://www.khronos.org/sycl/)). See **sycl** branch. +======= +The README briefly describes how to get started with building LLVM. +For more information on how to contribute to the LLVM project, please +take a look at the +[Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide. + +## Getting Started with the LLVM System +>>>>>>> effcdc3a82f2a32829170e7f7a2ff3d7853b612d ## License See [LICENSE.txt](sycl/LICENSE.TXT) for details. diff --git a/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py b/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py index df05101e4fd8c..e3a52f094f663 100644 --- a/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py +++ b/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py @@ -17,6 +17,7 @@ # It operates on the current, potentially unsaved buffer and does not create # or save any files. To revert a fix, just undo. +from __future__ import print_function import argparse import difflib import json @@ -79,7 +80,7 @@ def GetUserSelection(message, headers, maximum_suggested_headers): except Exception: # Show a new prompt on invalid option instead of aborting so that users # don't need to wait for another clang-include-fixer run. - print >> sys.stderr, "Invalid option:", res + print("Invalid option: {}".format(res), file=sys.stderr) return GetUserSelection(message, headers, maximum_suggested_headers) return headers[idx - 1] @@ -95,7 +96,7 @@ def execute(command, text): p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, startupinfo=startupinfo) - return p.communicate(input=text) + return p.communicate(input=text.encode('utf-8')) def InsertHeaderToVimBuffer(header, text): @@ -159,7 +160,7 @@ def main(): if query_mode: symbol = get_symbol_under_cursor() if len(symbol) == 0: - print "Skip querying empty symbol." + print("Skip querying empty symbol.") return command = [binary, "-stdin", "-query-symbol="+get_symbol_under_cursor(), "-db=" + args.db, "-input=" + args.input, @@ -170,13 +171,14 @@ def main(): "-input=" + args.input, vim.current.buffer.name] stdout, stderr = execute(command, text) if stderr: - print >> sys.stderr, "Error while running clang-include-fixer: " + stderr + print("Error while running clang-include-fixer: {}".format(stderr), + file=sys.stderr) return include_fixer_context = json.loads(stdout) query_symbol_infos = include_fixer_context["QuerySymbolInfos"] if not query_symbol_infos: - print "The file is fine, no need to add a header." + print("The file is fine, no need to add a header.") return symbol = query_symbol_infos[0]["RawIdentifier"] # The header_infos is already sorted by clang-include-fixer. @@ -192,7 +194,7 @@ def main(): unique_headers.append(header) if not unique_headers: - print "Couldn't find a header for {0}.".format(symbol) + print("Couldn't find a header for {0}.".format(symbol)) return try: @@ -207,9 +209,9 @@ def main(): include_fixer_context["HeaderInfos"] = inserted_header_infos InsertHeaderToVimBuffer(include_fixer_context, text) - print "Added #include {0} for {1}.".format(selected, symbol) + print("Added #include {0} for {1}.".format(selected, symbol)) except Exception as error: - print >> sys.stderr, error.message + print(error, file=sys.stderr) return diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 91e8ebee13686..40aaf402ec0e1 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -314,10 +314,8 @@ ClangTidyASTConsumerFactory::ClangTidyASTConsumerFactory( IntrusiveRefCntPtr OverlayFS) : Context(Context), OverlayFS(OverlayFS), CheckFactories(new ClangTidyCheckFactories) { - for (ClangTidyModuleRegistry::iterator I = ClangTidyModuleRegistry::begin(), - E = ClangTidyModuleRegistry::end(); - I != E; ++I) { - std::unique_ptr Module(I->instantiate()); + for (ClangTidyModuleRegistry::entry E : ClangTidyModuleRegistry::entries()) { + std::unique_ptr Module = E.instantiate(); Module->addCheckFactories(*CheckFactories); } } diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp index 7ca5c1e3454b1..8d4366b51a3ec 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp @@ -54,7 +54,7 @@ static bool isSurroundedRight(const Token &T) { /// Is given TokenKind a keyword? static bool isKeyword(const Token &T) { // FIXME: better matching of keywords to avoid false positives. - return T.isOneOf(tok::kw_case, tok::kw_const, tok::kw_struct); + return T.isOneOf(tok::kw_if, tok::kw_case, tok::kw_const, tok::kw_struct); } /// Warning is written when one of these operators are not within parentheses. diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp index d94731beba945..9b34f5ab55a7f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp @@ -20,7 +20,8 @@ namespace bugprone { void SuspiciousSemicolonCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( stmt(anyOf(ifStmt(hasThen(nullStmt().bind("semi")), - unless(hasElse(stmt()))), + unless(hasElse(stmt())), + unless(isConstexpr())), forStmt(hasBody(nullStmt().bind("semi"))), cxxForRangeStmt(hasBody(nullStmt().bind("semi"))), whileStmt(hasBody(nullStmt().bind("semi"))))) diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp index 2d4475c991ca2..c9313dbae96a5 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp @@ -14,11 +14,12 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include @@ -34,45 +35,270 @@ namespace modernize { namespace { enum BindArgumentKind { BK_Temporary, BK_Placeholder, BK_CallExpr, BK_Other }; +enum CaptureMode { CM_None, CM_ByRef, CM_ByValue, CM_InitExpression }; + +enum CallableType { + CT_Other, // unknown + CT_Function, // global or static function + CT_MemberFunction, // member function with implicit this + CT_Object, // object with operator() +}; + +enum CallableMaterializationKind { + CMK_Other, // unknown + CMK_Function, // callable is the name of a member or non-member function. + CMK_VariableRef, // callable is a simple expression involving a global or + // local variable. + CMK_CallExpression, // callable is obtained as the result of a call expression +}; struct BindArgument { - StringRef Tokens; + // A rough classification of the type of expression this argument was. BindArgumentKind Kind = BK_Other; + + // If this argument required a capture, a value indicating how it was + // captured. + CaptureMode CM = CM_None; + + // The exact spelling of this argument in the source code. + StringRef SourceTokens; + + // The identifier of the variable within the capture list. This may be + // different from UsageIdentifier for example in the expression *d, where the + // variable is captured as d, but referred to as *d. + std::string CaptureIdentifier; + + // If this is a placeholder or capture init expression, contains the tokens + // used to refer to this parameter from within the body of the lambda. + std::string UsageIdentifier; + + // If Kind == BK_Placeholder, the index of the placeholder. size_t PlaceHolderIndex = 0; + + // True if the argument is used inside the lambda, false otherwise. + bool IsUsed = false; + + // The actual Expr object representing this expression. + const Expr *E = nullptr; +}; + +struct CallableInfo { + CallableType Type = CT_Other; + CallableMaterializationKind Materialization = CMK_Other; + CaptureMode CM = CM_None; + StringRef SourceTokens; + std::string CaptureIdentifier; + std::string UsageIdentifier; + StringRef CaptureInitializer; + const FunctionDecl *Decl = nullptr; +}; + +struct LambdaProperties { + CallableInfo Callable; + SmallVector BindArguments; + StringRef BindNamespace; + bool IsFixitSupported = false; }; } // end namespace +static const Expr *ignoreTemporariesAndPointers(const Expr *E) { + if (const auto *T = dyn_cast(E)) + return ignoreTemporariesAndPointers(T->getSubExpr()); + + const Expr *F = E->IgnoreImplicit(); + if (E != F) + return ignoreTemporariesAndPointers(F); + + return E; +} + +static const Expr *ignoreTemporariesAndConstructors(const Expr *E) { + if (const auto *T = dyn_cast(E)) + return ignoreTemporariesAndConstructors(T->getArg(0)); + + const Expr *F = E->IgnoreImplicit(); + if (E != F) + return ignoreTemporariesAndPointers(F); + + return E; +} + +static StringRef getSourceTextForExpr(const MatchFinder::MatchResult &Result, + const Expr *E) { + return Lexer::getSourceText( + CharSourceRange::getTokenRange(E->getBeginLoc(), E->getEndLoc()), + *Result.SourceManager, Result.Context->getLangOpts()); +} + +static bool isCallExprNamed(const Expr *E, StringRef Name) { + const auto *CE = dyn_cast(E->IgnoreImplicit()); + if (!CE) + return false; + const auto *ND = dyn_cast(CE->getCalleeDecl()); + if (!ND) + return false; + return ND->getQualifiedNameAsString() == Name; +} + +static void +initializeBindArgumentForCallExpr(const MatchFinder::MatchResult &Result, + BindArgument &B, const CallExpr *CE, + unsigned &CaptureIndex) { + // std::ref(x) means to capture x by reference. + if (isCallExprNamed(CE, "boost::ref") || isCallExprNamed(CE, "std::ref")) { + B.Kind = BK_Other; + B.CM = CM_ByRef; + B.UsageIdentifier = getSourceTextForExpr(Result, CE->getArg(0)); + } else { + B.Kind = BK_CallExpr; + B.CM = CM_InitExpression; + B.UsageIdentifier = "capture" + llvm::utostr(CaptureIndex++); + } + B.CaptureIdentifier = B.UsageIdentifier; +} + +static bool anyDescendantIsLocal(const Stmt *Statement) { + if (const auto *DeclRef = dyn_cast(Statement)) { + const ValueDecl *Decl = DeclRef->getDecl(); + if (const auto *Var = dyn_cast_or_null(Decl)) { + if (Var->isLocalVarDeclOrParm()) + return true; + } + } else if (isa(Statement)) + return true; + + return any_of(Statement->children(), anyDescendantIsLocal); +} + +static bool tryCaptureAsLocalVariable(const MatchFinder::MatchResult &Result, + BindArgument &B, const Expr *E) { + if (const auto *BTE = dyn_cast(E)) { + if (const auto *CE = dyn_cast(BTE->getSubExpr())) + return tryCaptureAsLocalVariable(Result, B, CE->getArg(0)); + return false; + } + + const auto *DRE = dyn_cast(E->IgnoreImplicit()); + if (!DRE) + return false; + + const auto *VD = dyn_cast(DRE->getDecl()); + if (!VD || !VD->isLocalVarDeclOrParm()) + return false; + + B.CM = CM_ByValue; + B.UsageIdentifier = getSourceTextForExpr(Result, E); + B.CaptureIdentifier = B.UsageIdentifier; + return true; +} + +static bool tryCaptureAsMemberVariable(const MatchFinder::MatchResult &Result, + BindArgument &B, const Expr *E) { + if (const auto *BTE = dyn_cast(E)) { + if (const auto *CE = dyn_cast(BTE->getSubExpr())) + return tryCaptureAsMemberVariable(Result, B, CE->getArg(0)); + return false; + } + + E = E->IgnoreImplicit(); + if (isa(E)) { + B.CM = CM_ByValue; + B.UsageIdentifier = getSourceTextForExpr(Result, E); + B.CaptureIdentifier = "this"; + return true; + } + + const auto *ME = dyn_cast(E); + if (!ME) + return false; + + if (!ME->isLValue() || !isa(ME->getMemberDecl())) + return false; + + B.CM = CM_ByValue; + B.UsageIdentifier = getSourceTextForExpr(Result, E); + B.CaptureIdentifier = "this"; + return true; +} + static SmallVector -buildBindArguments(const MatchFinder::MatchResult &Result, const CallExpr *C) { +buildBindArguments(const MatchFinder::MatchResult &Result, + const CallableInfo &Callable) { SmallVector BindArguments; llvm::Regex MatchPlaceholder("^_([0-9]+)$"); + const auto *BindCall = Result.Nodes.getNodeAs("bind"); + // Start at index 1 as first argument to bind is the function name. - for (size_t I = 1, ArgCount = C->getNumArgs(); I < ArgCount; ++I) { - const Expr *E = C->getArg(I); - BindArgument B; - if (const auto *M = dyn_cast(E)) { - const auto *TE = M->getSubExpr(); - B.Kind = isa(TE) ? BK_CallExpr : BK_Temporary; - } + unsigned CaptureIndex = 0; + for (size_t I = 1, ArgCount = BindCall->getNumArgs(); I < ArgCount; ++I) { + + const Expr *E = BindCall->getArg(I); + BindArgument &B = BindArguments.emplace_back(); + + size_t ArgIndex = I - 1; + if (Callable.Type == CT_MemberFunction) + --ArgIndex; + + bool IsObjectPtr = (I == 1 && Callable.Type == CT_MemberFunction); + B.E = E; + B.SourceTokens = getSourceTextForExpr(Result, E); - B.Tokens = Lexer::getSourceText( - CharSourceRange::getTokenRange(E->getBeginLoc(), E->getEndLoc()), - *Result.SourceManager, Result.Context->getLangOpts()); + if (!Callable.Decl || ArgIndex < Callable.Decl->getNumParams() || + IsObjectPtr) + B.IsUsed = true; SmallVector Matches; - if (B.Kind == BK_Other && MatchPlaceholder.match(B.Tokens, &Matches)) { + if (MatchPlaceholder.match(B.SourceTokens, &Matches)) { B.Kind = BK_Placeholder; B.PlaceHolderIndex = std::stoi(Matches[1]); + B.UsageIdentifier = "PH" + llvm::utostr(B.PlaceHolderIndex); + B.CaptureIdentifier = B.UsageIdentifier; + continue; + } + + if (const auto *CE = + dyn_cast(ignoreTemporariesAndConstructors(E))) { + initializeBindArgumentForCallExpr(Result, B, CE, CaptureIndex); + continue; + } + + if (tryCaptureAsLocalVariable(Result, B, B.E) || + tryCaptureAsMemberVariable(Result, B, B.E)) + continue; + + // If it's not something we recognize, capture it by init expression to be + // safe. + B.Kind = BK_Other; + if (IsObjectPtr) { + B.CM = CM_InitExpression; + B.UsageIdentifier = "ObjectPtr"; + B.CaptureIdentifier = B.UsageIdentifier; + } else if (anyDescendantIsLocal(B.E)) { + B.CM = CM_InitExpression; + B.CaptureIdentifier = "capture" + llvm::utostr(CaptureIndex++); + B.UsageIdentifier = B.CaptureIdentifier; } - BindArguments.push_back(B); } return BindArguments; } -static void addPlaceholderArgs(const ArrayRef Args, - llvm::raw_ostream &Stream) { +static int findPositionOfPlaceholderUse(ArrayRef Args, + size_t PlaceholderIndex) { + for (size_t I = 0; I < Args.size(); ++I) + if (Args[I].PlaceHolderIndex == PlaceholderIndex) + return I; + + return -1; +} + +static void addPlaceholderArgs(const LambdaProperties &LP, + llvm::raw_ostream &Stream, + bool PermissiveParameterList) { + + ArrayRef Args = LP.BindArguments; + auto MaxPlaceholderIt = std::max_element(Args.begin(), Args.end(), [](const BindArgument &B1, const BindArgument &B2) { @@ -80,27 +306,41 @@ static void addPlaceholderArgs(const ArrayRef Args, }); // Placeholders (if present) have index 1 or greater. - if (MaxPlaceholderIt == Args.end() || MaxPlaceholderIt->PlaceHolderIndex == 0) + if (!PermissiveParameterList && (MaxPlaceholderIt == Args.end() || + MaxPlaceholderIt->PlaceHolderIndex == 0)) return; size_t PlaceholderCount = MaxPlaceholderIt->PlaceHolderIndex; Stream << "("; StringRef Delimiter = ""; for (size_t I = 1; I <= PlaceholderCount; ++I) { - Stream << Delimiter << "auto && arg" << I; + Stream << Delimiter << "auto &&"; + + int ArgIndex = findPositionOfPlaceholderUse(Args, I); + + if (ArgIndex != -1 && Args[ArgIndex].IsUsed) + Stream << " " << Args[ArgIndex].UsageIdentifier; Delimiter = ", "; } + if (PermissiveParameterList) + Stream << Delimiter << "auto && ..."; Stream << ")"; } -static void addFunctionCallArgs(const ArrayRef Args, +static void addFunctionCallArgs(ArrayRef Args, llvm::raw_ostream &Stream) { StringRef Delimiter = ""; - for (const auto &B : Args) { - if (B.PlaceHolderIndex) - Stream << Delimiter << "arg" << B.PlaceHolderIndex; - else - Stream << Delimiter << B.Tokens; + + for (int I = 0, Size = Args.size(); I < Size; ++I) { + const BindArgument &B = Args[I]; + + Stream << Delimiter; + + if (B.Kind == BK_Placeholder || B.CM != CM_None) + Stream << B.UsageIdentifier; + else if (B.CM == CM_None) + Stream << B.SourceTokens; + Delimiter = ", "; } } @@ -116,59 +356,301 @@ static bool isPlaceHolderIndexRepeated(const ArrayRef Args) { return false; } +static std::vector +findCandidateCallOperators(const CXXRecordDecl *RecordDecl, size_t NumArgs) { + std::vector Candidates; + + for (const clang::CXXMethodDecl *Method : RecordDecl->methods()) { + OverloadedOperatorKind OOK = Method->getOverloadedOperator(); + + if (OOK != OverloadedOperatorKind::OO_Call) + continue; + + if (Method->getNumParams() > NumArgs) + continue; + + Candidates.push_back(Method); + } + + return Candidates; +} + +static bool isFixitSupported(const CallableInfo &Callee, + ArrayRef Args) { + // Do not attempt to create fixits for nested std::bind or std::ref. + // Supporting nested std::bind will be more difficult due to placeholder + // sharing between outer and inner std::bind invocations, and std::ref + // requires us to capture some parameters by reference instead of by value. + if (any_of(Args, [](const BindArgument &B) { + return isCallExprNamed(B.E, "boost::bind") || + isCallExprNamed(B.E, "std::bind"); + })) { + return false; + } + + // Do not attempt to create fixits when placeholders are reused. + // Unused placeholders are supported by requiring C++14 generic lambdas. + // FIXME: Support this case by deducing the common type. + if (isPlaceHolderIndexRepeated(Args)) + return false; + + // If we can't determine the Decl being used, don't offer a fixit. + if (!Callee.Decl) + return false; + + if (Callee.Type == CT_Other || Callee.Materialization == CMK_Other) + return false; + + return true; +} + +const FunctionDecl *getCallOperator(const CXXRecordDecl *Callable, + size_t NumArgs) { + std::vector Candidates = + findCandidateCallOperators(Callable, NumArgs); + if (Candidates.size() != 1) + return nullptr; + + return Candidates.front(); +} + +const FunctionDecl * +getCallMethodDecl(const MatchFinder::MatchResult &Result, CallableType Type, + CallableMaterializationKind Materialization) { + + const Expr *Callee = Result.Nodes.getNodeAs("ref"); + const Expr *CallExpression = ignoreTemporariesAndPointers(Callee); + + if (Type == CT_Object) { + const auto *BindCall = Result.Nodes.getNodeAs("bind"); + size_t NumArgs = BindCall->getNumArgs() - 1; + return getCallOperator(Callee->getType()->getAsCXXRecordDecl(), NumArgs); + } + + if (Materialization == CMK_Function) { + if (const auto *DRE = dyn_cast(CallExpression)) + return dyn_cast(DRE->getDecl()); + } + + // Maybe this is an indirect call through a function pointer or something + // where we can't determine the exact decl. + return nullptr; +} + +static CallableType getCallableType(const MatchFinder::MatchResult &Result) { + const auto *CallableExpr = Result.Nodes.getNodeAs("ref"); + + QualType QT = CallableExpr->getType(); + if (QT->isMemberFunctionPointerType()) + return CT_MemberFunction; + + if (QT->isFunctionPointerType() || QT->isFunctionReferenceType() || + QT->isFunctionType()) + return CT_Function; + + if (QT->isRecordType()) { + const CXXRecordDecl *Decl = QT->getAsCXXRecordDecl(); + if (!Decl) + return CT_Other; + + return CT_Object; + } + + return CT_Other; +} + +static CallableMaterializationKind +getCallableMaterialization(const MatchFinder::MatchResult &Result) { + const auto *CallableExpr = Result.Nodes.getNodeAs("ref"); + + const auto *NoTemporaries = ignoreTemporariesAndPointers(CallableExpr); + + if (isa(NoTemporaries)) + return CMK_CallExpression; + + if (isa(NoTemporaries) || + isa(NoTemporaries)) + return CMK_Function; + + if (const auto *DRE = dyn_cast(NoTemporaries)) { + if (isa(DRE->getDecl())) + return CMK_Function; + if (isa(DRE->getDecl())) + return CMK_VariableRef; + } + + return CMK_Other; +} + +static LambdaProperties +getLambdaProperties(const MatchFinder::MatchResult &Result) { + const auto *CalleeExpr = Result.Nodes.getNodeAs("ref"); + + LambdaProperties LP; + + const auto *Bind = Result.Nodes.getNodeAs("bind"); + const auto *Decl = dyn_cast(Bind->getCalleeDecl()); + const auto *NS = + dyn_cast(Decl->getEnclosingNamespaceContext()); + while (NS->isInlineNamespace()) + NS = dyn_cast(NS->getDeclContext()); + LP.BindNamespace = NS->getName(); + + LP.Callable.Type = getCallableType(Result); + LP.Callable.Materialization = getCallableMaterialization(Result); + LP.Callable.Decl = + getCallMethodDecl(Result, LP.Callable.Type, LP.Callable.Materialization); + LP.Callable.SourceTokens = getSourceTextForExpr(Result, CalleeExpr); + if (LP.Callable.Materialization == CMK_VariableRef) { + LP.Callable.CM = CM_ByValue; + LP.Callable.UsageIdentifier = getSourceTextForExpr(Result, CalleeExpr); + LP.Callable.CaptureIdentifier = + getSourceTextForExpr(Result, ignoreTemporariesAndPointers(CalleeExpr)); + } else if (LP.Callable.Materialization == CMK_CallExpression) { + LP.Callable.CM = CM_InitExpression; + LP.Callable.UsageIdentifier = "Func"; + LP.Callable.CaptureIdentifier = "Func"; + LP.Callable.CaptureInitializer = getSourceTextForExpr(Result, CalleeExpr); + } + + LP.BindArguments = buildBindArguments(Result, LP.Callable); + + LP.IsFixitSupported = isFixitSupported(LP.Callable, LP.BindArguments); + + return LP; +} + +static bool emitCapture(llvm::StringSet<> &CaptureSet, StringRef Delimiter, + CaptureMode CM, StringRef Identifier, + StringRef InitExpression, raw_ostream &Stream) { + if (CM == CM_None) + return false; + + // This capture has already been emitted. + if (CaptureSet.count(Identifier) != 0) + return false; + + Stream << Delimiter; + + if (CM == CM_ByRef) + Stream << "&"; + Stream << Identifier; + if (CM == CM_InitExpression) + Stream << " = " << InitExpression; + + CaptureSet.insert(Identifier); + return true; +} + +static void emitCaptureList(const LambdaProperties &LP, + const MatchFinder::MatchResult &Result, + raw_ostream &Stream) { + llvm::StringSet<> CaptureSet; + bool AnyCapturesEmitted = false; + + AnyCapturesEmitted = emitCapture(CaptureSet, "", LP.Callable.CM, + LP.Callable.CaptureIdentifier, + LP.Callable.CaptureInitializer, Stream); + + for (const BindArgument &B : LP.BindArguments) { + if (B.CM == CM_None || !B.IsUsed) + continue; + + StringRef Delimiter = AnyCapturesEmitted ? ", " : ""; + + if (emitCapture(CaptureSet, Delimiter, B.CM, B.CaptureIdentifier, + B.SourceTokens, Stream)) + AnyCapturesEmitted = true; + } +} + +static ArrayRef +getForwardedArgumentList(const LambdaProperties &P) { + ArrayRef Args = makeArrayRef(P.BindArguments); + if (P.Callable.Type != CT_MemberFunction) + return Args; + + return Args.drop_front(); +} +AvoidBindCheck::AvoidBindCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + PermissiveParameterList(Options.get("PermissiveParameterList", 0) != 0) {} + void AvoidBindCheck::registerMatchers(MatchFinder *Finder) { if (!getLangOpts().CPlusPlus14) // Need C++14 for generic lambdas. return; Finder->addMatcher( callExpr( - callee(namedDecl(hasName("::std::bind"))), - hasArgument(0, declRefExpr(to(functionDecl().bind("f"))).bind("ref"))) + callee(namedDecl( + anyOf(hasName("::boost::bind"), hasName("::std::bind")))), + hasArgument( + 0, anyOf(expr(hasType(memberPointerType())).bind("ref"), + expr(hasParent(materializeTemporaryExpr().bind("ref"))), + expr().bind("ref")))) .bind("bind"), this); } void AvoidBindCheck::check(const MatchFinder::MatchResult &Result) { const auto *MatchedDecl = Result.Nodes.getNodeAs("bind"); - auto Diag = diag(MatchedDecl->getBeginLoc(), "prefer a lambda to std::bind"); - - const auto Args = buildBindArguments(Result, MatchedDecl); - // Do not attempt to create fixits for nested call expressions. - // FIXME: Create lambda capture variables to capture output of calls. - // NOTE: Supporting nested std::bind will be more difficult due to placeholder - // sharing between outer and inner std:bind invocations. - if (llvm::any_of(Args, - [](const BindArgument &B) { return B.Kind == BK_CallExpr; })) - return; - - // Do not attempt to create fixits when placeholders are reused. - // Unused placeholders are supported by requiring C++14 generic lambdas. - // FIXME: Support this case by deducing the common type. - if (isPlaceHolderIndexRepeated(Args)) + LambdaProperties LP = getLambdaProperties(Result); + auto Diag = + diag(MatchedDecl->getBeginLoc(), + formatv("prefer a lambda to {0}::bind", LP.BindNamespace).str()); + if (!LP.IsFixitSupported) return; - const auto *F = Result.Nodes.getNodeAs("f"); - - // std::bind can support argument count mismatch between its arguments and the - // bound function's arguments. Do not attempt to generate a fixit for such - // cases. - // FIXME: Support this case by creating unused lambda capture variables. - if (F->getNumParams() != Args.size()) - return; + const auto *Ref = Result.Nodes.getNodeAs("ref"); std::string Buffer; llvm::raw_string_ostream Stream(Buffer); - bool HasCapturedArgument = llvm::any_of( - Args, [](const BindArgument &B) { return B.Kind == BK_Other; }); - const auto *Ref = Result.Nodes.getNodeAs("ref"); - Stream << "[" << (HasCapturedArgument ? "=" : "") << "]"; - addPlaceholderArgs(Args, Stream); - Stream << " { return "; - Ref->printPretty(Stream, nullptr, Result.Context->getPrintingPolicy()); + Stream << "["; + emitCaptureList(LP, Result, Stream); + Stream << "]"; + + ArrayRef FunctionCallArgs = makeArrayRef(LP.BindArguments); + + addPlaceholderArgs(LP, Stream, PermissiveParameterList); + + if (LP.Callable.Type == CT_Function) { + StringRef SourceTokens = LP.Callable.SourceTokens; + SourceTokens.consume_front("&"); + Stream << " { return " << SourceTokens; + } else if (LP.Callable.Type == CT_MemberFunction) { + const auto *MethodDecl = dyn_cast(LP.Callable.Decl); + const BindArgument &ObjPtr = FunctionCallArgs.front(); + + Stream << " { "; + if (!isa(ignoreTemporariesAndPointers(ObjPtr.E))) { + Stream << ObjPtr.UsageIdentifier; + Stream << "->"; + } + + Stream << MethodDecl->getName(); + } else { + Stream << " { return "; + switch (LP.Callable.CM) { + case CM_ByValue: + case CM_ByRef: + if (LP.Callable.UsageIdentifier != LP.Callable.CaptureIdentifier) { + Stream << "(" << LP.Callable.UsageIdentifier << ")"; + break; + } + LLVM_FALLTHROUGH; + case CM_InitExpression: + Stream << LP.Callable.UsageIdentifier; + break; + default: + Ref->printPretty(Stream, nullptr, Result.Context->getPrintingPolicy()); + } + } + Stream << "("; - addFunctionCallArgs(Args, Stream); + + addFunctionCallArgs(getForwardedArgumentList(LP), Stream); Stream << "); }"; Diag << FixItHint::CreateReplacement(MatchedDecl->getSourceRange(), diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h index 4b393303b7ef0..5576fe6c3bd5d 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h @@ -23,10 +23,12 @@ namespace modernize { /// http://clang.llvm.org/extra/clang-tidy/checks/modernize-avoid-std-bind.html class AvoidBindCheck : public ClangTidyCheck { public: - AvoidBindCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + AvoidBindCheck(StringRef Name, ClangTidyContext *Context); void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + bool PermissiveParameterList = false; }; } // namespace modernize } // namespace tidy diff --git a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp index a2a56241e8ab6..eb3d7c505b831 100644 --- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp @@ -19,44 +19,6 @@ namespace clang { namespace tidy { namespace readability { -namespace { -class NamespaceCommentPPCallbacks : public PPCallbacks { -public: - NamespaceCommentPPCallbacks(Preprocessor *PP, NamespaceCommentCheck *Check) - : PP(PP), Check(Check) {} - - void MacroDefined(const Token &MacroNameTok, const MacroDirective *MD) { - // Record all defined macros. We store the whole token to compare names - // later. - - const MacroInfo * MI = MD->getMacroInfo(); - - if (MI->isFunctionLike()) - return; - - std::string ValueBuffer; - llvm::raw_string_ostream Value(ValueBuffer); - - SmallString<128> SpellingBuffer; - bool First = true; - for (const auto &T : MI->tokens()) { - if (!First && T.hasLeadingSpace()) - Value << ' '; - - Value << PP->getSpelling(T, SpellingBuffer); - First = false; - } - - Check->addMacro(MacroNameTok.getIdentifierInfo()->getName().str(), - Value.str()); - } - -private: - Preprocessor *PP; - NamespaceCommentCheck *Check; -}; -} // namespace - NamespaceCommentCheck::NamespaceCommentCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), @@ -78,37 +40,24 @@ void NamespaceCommentCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(namespaceDecl().bind("namespace"), this); } -void NamespaceCommentCheck::registerPPCallbacks( - const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { - PP->addPPCallbacks(std::make_unique(PP, this)); -} - static bool locationsInSameFile(const SourceManager &Sources, SourceLocation Loc1, SourceLocation Loc2) { return Loc1.isFileID() && Loc2.isFileID() && Sources.getFileID(Loc1) == Sources.getFileID(Loc2); } -std::string NamespaceCommentCheck::getNamespaceComment(const NamespaceDecl *ND, - bool InsertLineBreak) { +static std::string getNamespaceComment(const NamespaceDecl *ND, + bool InsertLineBreak) { std::string Fix = "// namespace"; - if (!ND->isAnonymousNamespace()) { - bool IsNamespaceMacroExpansion; - StringRef MacroDefinition; - std::tie(IsNamespaceMacroExpansion, MacroDefinition) = - isNamespaceMacroExpansion(ND->getName()); - - Fix.append(" ").append(IsNamespaceMacroExpansion ? MacroDefinition - : ND->getName()); - } + if (!ND->isAnonymousNamespace()) + Fix.append(" ").append(ND->getNameAsString()); if (InsertLineBreak) Fix.append("\n"); return Fix; } -std::string -NamespaceCommentCheck::getNamespaceComment(const std::string &NameSpaceName, - bool InsertLineBreak) { +static std::string getNamespaceComment(const std::string &NameSpaceName, + bool InsertLineBreak) { std::string Fix = "// namespace "; Fix.append(NameSpaceName); if (InsertLineBreak) @@ -116,32 +65,6 @@ NamespaceCommentCheck::getNamespaceComment(const std::string &NameSpaceName, return Fix; } -void NamespaceCommentCheck::addMacro(const std::string &Name, - const std::string &Value) noexcept { - Macros.emplace_back(Name, Value); -} - -bool NamespaceCommentCheck::isNamespaceMacroDefinition( - const StringRef NameSpaceName) { - return llvm::any_of(Macros, [&NameSpaceName](const auto &Macro) { - return NameSpaceName == Macro.first; - }); -} - -std::tuple NamespaceCommentCheck::isNamespaceMacroExpansion( - const StringRef NameSpaceName) { - const auto &MacroIt = - llvm::find_if(Macros, [&NameSpaceName](const auto &Macro) { - return NameSpaceName == Macro.second; - }); - - const bool IsNamespaceMacroExpansion = Macros.end() != MacroIt; - - return std::make_tuple(IsNamespaceMacroExpansion, - IsNamespaceMacroExpansion ? StringRef(MacroIt->first) - : NameSpaceName); -} - void NamespaceCommentCheck::check(const MatchFinder::MatchResult &Result) { const auto *ND = Result.Nodes.getNodeAs("namespace"); const SourceManager &Sources = *Result.SourceManager; @@ -220,48 +143,28 @@ void NamespaceCommentCheck::check(const MatchFinder::MatchResult &Result) { StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : ""; StringRef Anonymous = Groups.size() > 3 ? Groups[3] : ""; - // Don't allow to use macro expansion in closing comment. - // FIXME: Use Structured Bindings once C++17 features will be enabled. - bool IsNamespaceMacroExpansion; - StringRef MacroDefinition; - std::tie(IsNamespaceMacroExpansion, MacroDefinition) = - isNamespaceMacroExpansion(NamespaceNameInComment); - if (IsNested && NestedNamespaceName == NamespaceNameInComment) { // C++17 nested namespace. return; } else if ((ND->isAnonymousNamespace() && NamespaceNameInComment.empty()) || - (((ND->getNameAsString() == NamespaceNameInComment) && - Anonymous.empty()) && - !IsNamespaceMacroExpansion)) { + (ND->getNameAsString() == NamespaceNameInComment && + Anonymous.empty())) { // Check if the namespace in the comment is the same. // FIXME: Maybe we need a strict mode, where we always fix namespace // comments with different format. return; } - // Allow using macro definitions in closing comment. - if (isNamespaceMacroDefinition(NamespaceNameInComment)) - return; - // Otherwise we need to fix the comment. NeedLineBreak = Comment.startswith("/*"); OldCommentRange = SourceRange(AfterRBrace, Loc.getLocWithOffset(Tok.getLength())); - - if (IsNamespaceMacroExpansion) { - Message = (llvm::Twine("%0 ends with a comment that refers to an " - "expansion of macro")) - .str(); - NestedNamespaceName = MacroDefinition; - } else { - Message = (llvm::Twine("%0 ends with a comment that refers to a " - "wrong namespace '") + - NamespaceNameInComment + "'") - .str(); - } - + Message = + (llvm::Twine( + "%0 ends with a comment that refers to a wrong namespace '") + + NamespaceNameInComment + "'") + .str(); } else if (Comment.startswith("//")) { // Assume that this is an unrecognized form of a namespace closing line // comment. Replace it. @@ -274,16 +177,6 @@ void NamespaceCommentCheck::check(const MatchFinder::MatchResult &Result) { // multi-line or there may be other tokens behind it. } - // Print Macro definition instead of expansion. - // FIXME: Use Structured Bindings once C++17 features will be enabled. - bool IsNamespaceMacroExpansion; - StringRef MacroDefinition; - std::tie(IsNamespaceMacroExpansion, MacroDefinition) = - isNamespaceMacroExpansion(NestedNamespaceName); - - if (IsNamespaceMacroExpansion) - NestedNamespaceName = MacroDefinition; - std::string NamespaceName = ND->isAnonymousNamespace() ? "anonymous namespace" diff --git a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h index bc5c11e7b71b9..712cd4662965e 100644 --- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h +++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h @@ -26,29 +26,14 @@ class NamespaceCommentCheck : public ClangTidyCheck { NamespaceCommentCheck(StringRef Name, ClangTidyContext *Context); void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, - Preprocessor *ModuleExpanderPP) override; - - void addMacro(const std::string &Name, const std::string &Value) noexcept; private: void storeOptions(ClangTidyOptions::OptionMap &Options) override; - std::string getNamespaceComment(const NamespaceDecl *ND, - bool InsertLineBreak); - std::string getNamespaceComment(const std::string &NameSpaceName, - bool InsertLineBreak); - bool isNamespaceMacroDefinition(const StringRef NameSpaceName); - std::tuple - isNamespaceMacroExpansion(const StringRef NameSpaceName); llvm::Regex NamespaceCommentPattern; const unsigned ShortNamespaceLines; const unsigned SpacesBeforeComments; llvm::SmallVector Ends; - - // Store macros to verify that warning is not thrown when namespace name is a - // preprocessed define. - std::vector> Macros; }; } // namespace readability diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp index df83de856238f..ad6182def20d2 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp @@ -18,6 +18,7 @@ #include "../ClangTidyForceLinker.h" #include "../GlobList.h" #include "clang/Tooling/CommonOptionsParser.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" @@ -327,7 +328,7 @@ getVfsFromFile(const std::string &OverlayFile, } static int clangTidyMain(int argc, const char **argv) { - llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + llvm::InitLLVM X(argc, argv); CommonOptionsParser OptionsParser(argc, argv, ClangTidyCategory, cl::ZeroOrMore); llvm::IntrusiveRefCntPtr BaseFS( diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 8ab2ae6b91d3a..c1aea3bd119d1 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -69,6 +69,7 @@ add_clang_library(clangDaemon Selection.cpp SemanticHighlighting.cpp SemanticSelection.cpp + Shutdown.cpp SourceCode.cpp QueryDriverDatabase.cpp Threading.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 4fe8158180749..57ed97f7a7825 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -103,13 +103,13 @@ std::vector> buildHighlightScopeLookupTable() { return LookupTable; } -// Makes sure edits in \p E are applicable to latest file contents reported by +// Makes sure edits in \p FE are applicable to latest file contents reported by // editor. If not generates an error message containing information about files // that needs to be saved. -llvm::Error validateEdits(const DraftStore &DraftMgr, const Tweak::Effect &E) { +llvm::Error validateEdits(const DraftStore &DraftMgr, const FileEdits &FE) { size_t InvalidFileCount = 0; llvm::StringRef LastInvalidFile; - for (const auto &It : E.ApplyEdits) { + for (const auto &It : FE) { if (auto Draft = DraftMgr.getDraft(It.first())) { // If the file is open in user's editor, make sure the version we // saw and current version are compatible as this is the text that @@ -704,7 +704,7 @@ void ClangdLSPServer::onCommand(const ExecuteCommandParams &Params, if (R->ApplyEdits.empty()) return Reply("Tweak applied."); - if (auto Err = validateEdits(DraftMgr, *R)) + if (auto Err = validateEdits(DraftMgr, R->ApplyEdits)) return Reply(std::move(Err)); WorkspaceEdit WE; @@ -758,17 +758,23 @@ void ClangdLSPServer::onRename(const RenameParams &Params, if (!Code) return Reply(llvm::make_error( "onRename called for non-added file", ErrorCode::InvalidParams)); - - Server->rename(File, Params.position, Params.newName, /*WantFormat=*/true, - [File, Code, Params, Reply = std::move(Reply)]( - llvm::Expected> Edits) mutable { - if (!Edits) - return Reply(Edits.takeError()); - - WorkspaceEdit WE; - WE.changes = {{Params.textDocument.uri.uri(), *Edits}}; - Reply(WE); - }); + Server->rename( + File, Params.position, Params.newName, + /*WantFormat=*/true, + [File, Params, Reply = std::move(Reply), + this](llvm::Expected Edits) mutable { + if (!Edits) + return Reply(Edits.takeError()); + if (auto Err = validateEdits(DraftMgr, *Edits)) + return Reply(std::move(Err)); + WorkspaceEdit Result; + Result.changes.emplace(); + for (const auto &Rep : *Edits) { + (*Result.changes)[URI::createFile(Rep.first()).toString()] = + Rep.second.asTextEdits(); + } + Reply(Result); + }); } void ClangdLSPServer::onDocumentDidClose( diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 5a9833d78b48e..e9e03dbc37426 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -119,7 +119,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, : nullptr), GetClangTidyOptions(Opts.GetClangTidyOptions), SuggestMissingIncludes(Opts.SuggestMissingIncludes), - TweakFilter(Opts.TweakFilter), WorkspaceRoot(Opts.WorkspaceRoot), + CrossFileRename(Opts.CrossFileRename), TweakFilter(Opts.TweakFilter), + WorkspaceRoot(Opts.WorkspaceRoot), // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST // is parsed. @@ -308,54 +309,68 @@ void ClangdServer::prepareRename(PathRef File, Position Pos, if (!InpAST) return CB(InpAST.takeError()); auto &AST = InpAST->AST; - // Performing the rename isn't substantially more expensive than doing an - // AST-based check, so we just rename and throw away the results. We may - // have to revisit this when we support cross-file rename. - auto Changes = renameWithinFile(AST, File, Pos, "dummy", Index); + const auto &SM = AST.getSourceManager(); + SourceLocation Loc = + SM.getMacroArgExpandedLocation(getBeginningOfIdentifier( + Pos, AST.getSourceManager(), AST.getLangOpts())); + auto Range = getTokenRange(SM, AST.getLangOpts(), Loc); + if (!Range) + return CB(llvm::None); // "rename" is not valid at the position. + + if (CrossFileRename) + // FIXME: we now assume cross-file rename always succeeds, revisit this. + return CB(*Range); + + // Performing the local rename isn't substantially more expensive than + // doing an AST-based check, so we just rename and throw away the results. + auto Changes = clangd::rename({Pos, "dummy", AST, File, Index, + /*AllowCrossFile=*/false, + /*GetDirtyBuffer=*/nullptr}); if (!Changes) { // LSP says to return null on failure, but that will result in a generic // failure message. If we send an LSP error response, clients can surface // the message to users (VSCode does). return CB(Changes.takeError()); } - SourceLocation Loc = getBeginningOfIdentifier( - Pos, AST.getSourceManager(), AST.getASTContext().getLangOpts()); - if (auto Range = getTokenRange(AST.getSourceManager(), - AST.getASTContext().getLangOpts(), Loc)) - return CB(*Range); - // Return null if the "rename" is not valid at the position. - CB(llvm::None); + return CB(*Range); }; WorkScheduler.runWithAST("PrepareRename", File, std::move(Action)); } void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, - bool WantFormat, Callback> CB) { + bool WantFormat, Callback CB) { + // A snapshot of all file dirty buffers. + llvm::StringMap Snapshot = WorkScheduler.getAllFileContents(); auto Action = [File = File.str(), NewName = NewName.str(), Pos, WantFormat, - CB = std::move(CB), + CB = std::move(CB), Snapshot = std::move(Snapshot), this](llvm::Expected InpAST) mutable { if (!InpAST) return CB(InpAST.takeError()); - auto Changes = renameWithinFile(InpAST->AST, File, Pos, NewName, Index); - if (!Changes) - return CB(Changes.takeError()); + auto GetDirtyBuffer = + [&Snapshot](PathRef AbsPath) -> llvm::Optional { + auto It = Snapshot.find(AbsPath); + if (It == Snapshot.end()) + return llvm::None; + return It->second; + }; + auto Edits = clangd::rename({Pos, NewName, InpAST->AST, File, Index, + CrossFileRename, GetDirtyBuffer}); + if (!Edits) + return CB(Edits.takeError()); if (WantFormat) { auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents, InpAST->Inputs.FS.get()); - if (auto Formatted = - cleanupAndFormat(InpAST->Inputs.Contents, *Changes, Style)) - *Changes = std::move(*Formatted); - else - elog("Failed to format replacements: {0}", Formatted.takeError()); - } + llvm::Error Err = llvm::Error::success(); + for (auto &E : *Edits) + Err = + llvm::joinErrors(reformatEdit(E.getValue(), Style), std::move(Err)); - std::vector Edits; - for (const auto &Rep : *Changes) - Edits.push_back(replacementToEdit(InpAST->Inputs.Contents, Rep)); - return CB(std::move(Edits)); + if (Err) + return CB(std::move(Err)); + } + return CB(std::move(*Edits)); }; - WorkScheduler.runWithAST("Rename", File, std::move(Action)); } diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index cd0b91c08f084..499340808765b 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -24,6 +24,7 @@ #include "index/Background.h" #include "index/FileIndex.h" #include "index/Index.h" +#include "refactor/Rename.h" #include "refactor/Tweak.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Core/Replacement.h" @@ -133,6 +134,9 @@ class ClangdServer { /// Enable semantic highlighting features. bool SemanticHighlighting = false; + /// Enable cross-file rename feature. + bool CrossFileRename = false; + /// Returns true if the tweak should be enabled. std::function TweakFilter = [](const Tweak &T) { return !T.hidden(); // only enable non-hidden tweaks. @@ -252,7 +256,7 @@ class ClangdServer { /// embedders could use this method to get all occurrences of the symbol (e.g. /// highlighting them in prepare stage). void rename(PathRef File, Position Pos, llvm::StringRef NewName, - bool WantFormat, Callback> CB); + bool WantFormat, Callback CB); struct TweakRef { std::string ID; /// ID to pass for applyTweak. @@ -327,6 +331,8 @@ class ClangdServer { // can be caused by missing includes (e.g. member access in incomplete type). bool SuggestMissingIncludes = false; + bool CrossFileRename = false; + std::function TweakFilter; // GUARDED_BY(CachedCompletionFuzzyFindRequestMutex) diff --git a/clang-tools-extra/clangd/Compiler.cpp b/clang-tools-extra/clangd/Compiler.cpp index 795fd0082594d..eae753b5c9b36 100644 --- a/clang-tools-extra/clangd/Compiler.cpp +++ b/clang-tools-extra/clangd/Compiler.cpp @@ -42,7 +42,8 @@ void IgnoreDiagnostics::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, std::unique_ptr buildCompilerInvocation(const ParseInputs &Inputs, - clang::DiagnosticConsumer &D) { + clang::DiagnosticConsumer &D, + std::vector *CC1Args) { std::vector ArgStrs; for (const auto &S : Inputs.CompileCommand.CommandLine) ArgStrs.push_back(S.c_str()); @@ -57,7 +58,7 @@ buildCompilerInvocation(const ParseInputs &Inputs, CompilerInstance::createDiagnostics(new DiagnosticOptions, &D, false); std::unique_ptr CI = createInvocationFromCommandLine( ArgStrs, CommandLineDiagsEngine, Inputs.FS, - /*ShouldRecoverOnErrors=*/true); + /*ShouldRecoverOnErrors=*/true, CC1Args); if (!CI) return nullptr; // createInvocationFromCommandLine sets DisableFree. diff --git a/clang-tools-extra/clangd/Compiler.h b/clang-tools-extra/clangd/Compiler.h index 6ab1b0f075f93..51414c37fc042 100644 --- a/clang-tools-extra/clangd/Compiler.h +++ b/clang-tools-extra/clangd/Compiler.h @@ -52,8 +52,8 @@ struct ParseInputs { /// Builds compiler invocation that could be used to build AST or preamble. std::unique_ptr -buildCompilerInvocation(const ParseInputs &Inputs, - clang::DiagnosticConsumer &D); +buildCompilerInvocation(const ParseInputs &Inputs, clang::DiagnosticConsumer &D, + std::vector *CC1Args = nullptr); /// Creates a compiler instance, configured so that: /// - Contents of the parsed file are remapped to \p MainFile. diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index c536cbf75e5c0..3e55a6a9cdc68 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -175,6 +175,9 @@ struct TargetFinder { RelSet Flags; Visitor(TargetFinder &Outer, RelSet Flags) : Outer(Outer), Flags(Flags) {} + void VisitCallExpr(const CallExpr *CE) { + Outer.add(CE->getCalleeDecl(), Flags); + } void VisitDeclRefExpr(const DeclRefExpr *DRE) { const Decl *D = DRE->getDecl(); // UsingShadowDecl allows us to record the UsingDecl. diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp index 698f2460fea57..f90e46a24f329 100644 --- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp +++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp @@ -97,7 +97,7 @@ llvm::Optional getCorrespondingHeaderOrSource(const Path &OriginalFile, // // For each symbol in the original file, we get its target location (decl or // def) from the index, then award that target file. - bool IsHeader = isHeaderFile(OriginalFile, AST.getASTContext().getLangOpts()); + bool IsHeader = isHeaderFile(OriginalFile, AST.getLangOpts()); Index->lookup(Request, [&](const Symbol &Sym) { if (IsHeader) AwardTarget(Sym.Definition.FileURI); diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index c14ff1b3fe631..9053bc08b4ec3 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -367,8 +367,7 @@ HoverInfo getHoverContents(const DefinedMacro &Macro, ParsedAST &AST) { SourceLocation StartLoc = Macro.Info->getDefinitionLoc(); SourceLocation EndLoc = Macro.Info->getDefinitionEndLoc(); if (EndLoc.isValid()) { - EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, SM, - AST.getASTContext().getLangOpts()); + EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, SM, AST.getLangOpts()); bool Invalid; StringRef Buffer = SM.getBufferData(SM.getFileID(StartLoc), &Invalid); if (!Invalid) { @@ -391,7 +390,7 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, const SourceManager &SM = AST.getSourceManager(); llvm::Optional HI; SourceLocation SourceLocationBeg = SM.getMacroArgExpandedLocation( - getBeginningOfIdentifier(Pos, SM, AST.getASTContext().getLangOpts())); + getBeginningOfIdentifier(Pos, SM, AST.getLangOpts())); if (auto Deduced = getDeducedType(AST.getASTContext(), SourceLocationBeg)) { // Find the corresponding decl to populate kind and fetch documentation. @@ -435,9 +434,8 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, tooling::applyAllReplacements(HI->Definition, Replacements)) HI->Definition = *Formatted; - HI->SymRange = - getTokenRange(AST.getASTContext().getSourceManager(), - AST.getASTContext().getLangOpts(), SourceLocationBeg); + HI->SymRange = getTokenRange(AST.getASTContext().getSourceManager(), + AST.getLangOpts(), SourceLocationBeg); return HI; } diff --git a/clang-tools-extra/clangd/JSONTransport.cpp b/clang-tools-extra/clangd/JSONTransport.cpp index 4921035b6dbb3..6351b8056b3fa 100644 --- a/clang-tools-extra/clangd/JSONTransport.cpp +++ b/clang-tools-extra/clangd/JSONTransport.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "Logger.h" #include "Protocol.h" // For LSPError +#include "Shutdown.h" #include "Transport.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/Error.h" namespace clang { namespace clangd { @@ -81,6 +83,10 @@ class JSONTransport : public Transport { llvm::Error loop(MessageHandler &Handler) override { while (!feof(In)) { + if (shutdownRequested()) + return llvm::createStringError( + std::make_error_code(std::errc::operation_canceled), + "Got signal, shutting down"); if (ferror(In)) return llvm::errorCodeToError( std::error_code(errno, std::system_category())); @@ -167,7 +173,7 @@ bool JSONTransport::handleMessage(llvm::json::Value Message, } // Tries to read a line up to and including \n. -// If failing, feof() or ferror() will be set. +// If failing, feof(), ferror(), or shutdownRequested() will be set. bool readLine(std::FILE *In, std::string &Out) { static constexpr int BufSize = 1024; size_t Size = 0; @@ -175,7 +181,8 @@ bool readLine(std::FILE *In, std::string &Out) { for (;;) { Out.resize(Size + BufSize); // Handle EINTR which is sent when a debugger attaches on some platforms. - if (!llvm::sys::RetryAfterSignal(nullptr, ::fgets, &Out[Size], BufSize, In)) + if (!retryAfterSignalUnlessShutdown( + nullptr, [&] { return std::fgets(&Out[Size], BufSize, In); })) return false; clearerr(In); // If the line contained null bytes, anything after it (including \n) will @@ -190,7 +197,7 @@ bool readLine(std::FILE *In, std::string &Out) { } // Returns None when: -// - ferror() or feof() are set. +// - ferror(), feof(), or shutdownRequested() are set. // - Content-Length is missing or empty (protocol error) llvm::Optional JSONTransport::readStandardMessage() { // A Language Server Protocol message starts with a set of HTTP headers, @@ -244,8 +251,9 @@ llvm::Optional JSONTransport::readStandardMessage() { std::string JSON(ContentLength, '\0'); for (size_t Pos = 0, Read; Pos < ContentLength; Pos += Read) { // Handle EINTR which is sent when a debugger attaches on some platforms. - Read = llvm::sys::RetryAfterSignal(0u, ::fread, &JSON[Pos], 1, - ContentLength - Pos, In); + Read = retryAfterSignalUnlessShutdown(0, [&]{ + return std::fread(&JSON[Pos], 1, ContentLength - Pos, In); + }); if (Read == 0) { elog("Input was aborted. Read only {0} bytes of expected {1}.", Pos, ContentLength); @@ -263,7 +271,7 @@ llvm::Optional JSONTransport::readStandardMessage() { // - messages are delimited by '---' on a line by itself // - lines starting with # are ignored. // This is a testing path, so favor simplicity over performance here. -// When returning None, feof() or ferror() will be set. +// When returning None, feof(), ferror(), or shutdownRequested() will be set. llvm::Optional JSONTransport::readDelimitedMessage() { std::string JSON; std::string Line; @@ -280,6 +288,8 @@ llvm::Optional JSONTransport::readDelimitedMessage() { JSON += Line; } + if (shutdownRequested()) + return llvm::None; if (ferror(In)) { elog("Input error while reading message!"); return llvm::None; diff --git a/clang-tools-extra/clangd/ParsedAST.h b/clang-tools-extra/clangd/ParsedAST.h index 0b4a6ab73df83..f2afc264e23a1 100644 --- a/clang-tools-extra/clangd/ParsedAST.h +++ b/clang-tools-extra/clangd/ParsedAST.h @@ -77,6 +77,10 @@ class ParsedAST { return getASTContext().getSourceManager(); } + const LangOptions &getLangOpts() const { + return getASTContext().getLangOpts(); + } + /// This function returns top-level decls present in the main file of the AST. /// The result does not include the decls that come from the preamble. /// (These should be const, but RecursiveASTVisitor requires Decl*). diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index c91cd24e2f25f..ffa48f3a57d96 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -34,95 +34,289 @@ namespace { using Node = SelectionTree::Node; using ast_type_traits::DynTypedNode; -// Identifies which tokens are selected, and evaluates claims of source ranges -// by AST nodes. Tokens may be claimed only once: first-come, first-served. -class SelectedTokens { +// An IntervalSet maintains a set of disjoint subranges of an array. +// +// Initially, it contains the entire array. +// [-----------------------------------------------------------] +// +// When a range is erased(), it will typically split the array in two. +// Claim: [--------------------] +// after: [----------------] [-------------------] +// +// erase() returns the segments actually erased. Given the state above: +// Claim: [---------------------------------------] +// Out: [---------] [------] +// After: [-----] [-----------] +// +// It is used to track (expanded) tokens not yet associated with an AST node. +// On traversing an AST node, its token range is erased from the unclaimed set. +// The tokens actually removed are associated with that node, and hit-tested +// against the selection to determine whether the node is selected. +template +class IntervalSet { +public: + IntervalSet(llvm::ArrayRef Range) { UnclaimedRanges.insert(Range); } + + // Removes the elements of Claim from the set, modifying or removing ranges + // that overlap it. + // Returns the continuous subranges of Claim that were actually removed. + llvm::SmallVector, 4> erase(llvm::ArrayRef Claim) { + llvm::SmallVector, 4> Out; + if (Claim.empty()) + return Out; + + // General case: + // Claim: [-----------------] + // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] + // Overlap: ^first ^second + // Ranges C and D are fully included. Ranges B and E must be trimmed. + auto Overlap = std::make_pair( + UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C + UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F + // Rewind to cover B. + if (Overlap.first != UnclaimedRanges.begin()) { + --Overlap.first; + // ...unless B isn't selected at all. + if (Overlap.first->end() <= Claim.begin()) + ++Overlap.first; + } + if (Overlap.first == Overlap.second) + return Out; + + // First, copy all overlapping ranges into the output. + auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); + // If any of the overlapping ranges were sliced by the claim, split them: + // - restrict the returned range to the claimed part + // - save the unclaimed part so it can be reinserted + llvm::ArrayRef RemainingHead, RemainingTail; + if (Claim.begin() > OutFirst->begin()) { + RemainingHead = {OutFirst->begin(), Claim.begin()}; + *OutFirst = {Claim.begin(), OutFirst->end()}; + } + if (Claim.end() < Out.back().end()) { + RemainingTail = {Claim.end(), Out.back().end()}; + Out.back() = {Out.back().begin(), Claim.end()}; + } + + // Erase all the overlapping ranges (invalidating all iterators). + UnclaimedRanges.erase(Overlap.first, Overlap.second); + // Reinsert ranges that were merely trimmed. + if (!RemainingHead.empty()) + UnclaimedRanges.insert(RemainingHead); + if (!RemainingTail.empty()) + UnclaimedRanges.insert(RemainingTail); + + return Out; + } + +private: + using TokenRange = llvm::ArrayRef; + struct RangeLess { + bool operator()(llvm::ArrayRef L, llvm::ArrayRef R) const { + return L.begin() < R.begin(); + } + }; + + // Disjoint sorted unclaimed ranges of expanded tokens. + std::set, RangeLess> + UnclaimedRanges; +}; + +// Sentinel value for the selectedness of a node where we've seen no tokens yet. +// This resolves to Unselected if no tokens are ever seen. +// But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. +// This value is never exposed publicly. +constexpr SelectionTree::Selection NoTokens = + static_cast( + static_cast(SelectionTree::Complete + 1)); + +// Nodes start with NoTokens, and then use this function to aggregate the +// selectedness as more tokens are found. +void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { + if (New == NoTokens) + return; + if (Result == NoTokens) + Result = New; + else if (Result != New) + // Can only be completely selected (or unselected) if all tokens are. + Result = SelectionTree::Partial; +} + + +// SelectionTester can determine whether a range of tokens from the PP-expanded +// stream (corresponding to an AST node) is considered selected. +// +// When the tokens result from macro expansions, the appropriate tokens in the +// main file are examined (macro invocation or args). Similarly for #includes. +// +// It tests each token in the range (not just the endpoints) as contiguous +// expanded tokens may not have contiguous spellings (with macros). +// +// Non-token text, and tokens not modeled in the AST (comments, semicolons) +// are ignored when determining selectedness. +class SelectionTester { public: - SelectedTokens(llvm::ArrayRef Spelled, const SourceManager &SM, - unsigned SelBegin, unsigned SelEnd) - : SelBegin(SelBegin), SelEnd(SelEnd) { - // Extract bounds and selected-ness for all tokens spelled in the file. - Tokens.reserve(Spelled.size()); - for (const auto& Tok : Spelled) { + // The selection is offsets [SelBegin, SelEnd) in SelFile. + SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile, + unsigned SelBegin, unsigned SelEnd, const SourceManager &SM) + : SelFile(SelFile), SM(SM) { + // Find all tokens (partially) selected in the file. + auto AllSpelledTokens = Buf.spelledTokens(SelFile); + const syntax::Token *SelFirst = + llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) { + return SM.getFileOffset(Tok.endLocation()) <= SelBegin; + }); + const syntax::Token *SelLimit = std::partition_point( + SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) { + return SM.getFileOffset(Tok.location()) < SelEnd; + }); + // Precompute selectedness and offset for selected spelled tokens. + for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) { // As well as comments, don't count semicolons as real tokens. // They're not properly claimed as expr-statement is missing from the AST. - if (Tok.kind() == tok::comment || Tok.kind() == tok::semi) + if (T->kind() == tok::comment || T->kind() == tok::semi) continue; - - Tokens.emplace_back(); - TokInfo &S = Tokens.back(); - S.StartOffset = SM.getFileOffset(Tok.location()); - S.EndOffset = S.StartOffset + Tok.length(); - if (S.StartOffset >= SelBegin && S.EndOffset <= SelEnd) + SpelledTokens.emplace_back(); + Tok &S = SpelledTokens.back(); + S.Offset = SM.getFileOffset(T->location()); + if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd) S.Selected = SelectionTree::Complete; - else if (S.EndOffset > SelBegin && S.StartOffset < SelEnd) - S.Selected = SelectionTree::Partial; else - S.Selected = SelectionTree::Unselected; - S.Claimed = false; + S.Selected = SelectionTree::Partial; } } - // Associates any tokens overlapping [Begin, End) with an AST node. - // Tokens that were already claimed by another AST node are not claimed again. - // Updates Result if the node is selected in the sense of SelectionTree. - void claim(unsigned Begin, unsigned End, SelectionTree::Selection &Result) { - assert(Begin <= End); + // Test whether a consecutive range of tokens is selected. + // The tokens are taken from the expanded token stream. + SelectionTree::Selection + test(llvm::ArrayRef ExpandedTokens) const { + if (SpelledTokens.empty()) + return NoTokens; + SelectionTree::Selection Result = NoTokens; + while (!ExpandedTokens.empty()) { + // Take consecutive tokens from the same context together for efficiency. + FileID FID = SM.getFileID(ExpandedTokens.front().location()); + auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) { + return SM.getFileID(T.location()) == FID; + }); + assert(!Batch.empty()); + ExpandedTokens = ExpandedTokens.drop_front(Batch.size()); + + update(Result, testChunk(FID, Batch)); + } + return Result; + } - // Fast-path for missing the selection entirely. - if (Begin >= SelEnd || End <= SelBegin) - return; - - // We will consider the range (at least partially) selected if it hit any - // selected and previously unclaimed token. - bool ClaimedAnyToken = false; - // The selection is (at most) partial if: - // - any claimed token is partially selected - // - any token in the range is unselected - bool PartialSelection = false; - - // Find the first token that (maybe) overlaps the claimed range. - auto Start = llvm::partition_point(Tokens, [&](const TokInfo &Tok) { - return Tok.EndOffset <= Begin; - }); - // Iterate over every token that overlaps the range. - // Claim selected tokens, and update the two result flags. - for (auto It = Start; It != Tokens.end() && It->StartOffset < End; ++It) { - if (It->Selected) { - if (!It->Claimed) { - // Token is selected, in the node's range, and unclaimed; claim it. - It->Claimed = true; - ClaimedAnyToken = true; - // If the token was only partially selected, so is the node. - PartialSelection |= (It->Selected == SelectionTree::Partial); - } - } else { - // If the node covers an unselected token, it's not completely selected. - PartialSelection = true; + // Cheap check whether any of the tokens in R might be selected. + // If it returns false, test() will return NoTokens or Unselected. + // If it returns true, test() may return any value. + bool mayHit(SourceRange R) const { + if (SpelledTokens.empty()) + return false; + auto B = SM.getDecomposedLoc(R.getBegin()); + auto E = SM.getDecomposedLoc(R.getEnd()); + if (B.first == SelFile && E.first == SelFile) + if (E.second < SpelledTokens.front().Offset || + B.second > SpelledTokens.back().Offset) + return false; + return true; + } + +private: + // Hit-test a consecutive range of tokens from a single file ID. + SelectionTree::Selection + testChunk(FileID FID, llvm::ArrayRef Batch) const { + assert(!Batch.empty()); + SourceLocation StartLoc = Batch.front().location(); + // There are several possible categories of FileID depending on how the + // preprocessor was used to generate these tokens: + // main file, #included file, macro args, macro bodies. + // We need to identify the main-file tokens that represent Batch, and + // determine whether we want to exclusively claim them. Regular tokens + // represent one AST construct, but a macro invocation can represent many. + + // Handle tokens written directly in the main file. + if (FID == SelFile) { + return testTokenRange(SM.getFileOffset(Batch.front().location()), + SM.getFileOffset(Batch.back().location())); + } + + // Handle tokens in another file #included into the main file. + // Check if the #include is selected, but don't claim it exclusively. + if (StartLoc.isFileID()) { + for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); + Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { + if (SM.getFileID(Loc) == SelFile) + // FIXME: use whole #include directive, not just the filename string. + return testToken(SM.getFileOffset(Loc)); } + return NoTokens; } - // If some tokens were previously claimed (Result != Unselected), we may - // upgrade from Partial->Complete, even if no new tokens were claimed. - // Important for [[int a]]. - if (ClaimedAnyToken || Result) { - Result = std::max(Result, PartialSelection ? SelectionTree::Partial - : SelectionTree::Complete); + assert(StartLoc.isMacroID()); + // Handle tokens that were passed as a macro argument. + SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc); + if (SM.getFileID(ArgStart) == SelFile) { + SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location()); + return testTokenRange(SM.getFileOffset(ArgStart), + SM.getFileOffset(ArgEnd)); } + + // Handle tokens produced by non-argument macro expansion. + // Check if the macro name is selected, don't claim it exclusively. + auto Expansion = SM.getDecomposedExpansionLoc(StartLoc); + if (Expansion.first == SelFile) + // FIXME: also check ( and ) for function-like macros? + return testToken(Expansion.second); + else + return NoTokens; } -private: - struct TokInfo { - unsigned StartOffset; - unsigned EndOffset; + // Is the closed token range [Begin, End] selected? + SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const { + assert(Begin <= End); + // Outside the selection entirely? + if (End < SpelledTokens.front().Offset || + Begin > SpelledTokens.back().Offset) + return SelectionTree::Unselected; + + // Compute range of tokens. + auto B = llvm::partition_point( + SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; }); + auto E = std::partition_point( + B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; }); + + // Aggregate selectedness of tokens in range. + bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset || + End > SpelledTokens.back().Offset; + SelectionTree::Selection Result = + ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; + for (auto It = B; It != E; ++It) + update(Result, It->Selected); + return Result; + } + + // Is the token at `Offset` selected? + SelectionTree::Selection testToken(unsigned Offset) const { + // Outside the selection entirely? + if (Offset < SpelledTokens.front().Offset || + Offset > SpelledTokens.back().Offset) + return SelectionTree::Unselected; + // Find the token, if it exists. + auto It = llvm::partition_point( + SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; }); + if (It != SpelledTokens.end() && It->Offset == Offset) + return It->Selected; + return NoTokens; + } + + struct Tok { + unsigned Offset; SelectionTree::Selection Selected; - bool Claimed; - bool operator<(const TokInfo &Other) const { - return StartOffset < Other.StartOffset; - } }; - std::vector Tokens; - unsigned SelBegin, SelEnd; + std::vector SpelledTokens; + FileID SelFile; + const SourceManager &SM; }; // Show the type of a node for debugging. @@ -195,16 +389,6 @@ class SelectionVisitor : public RecursiveASTVisitor { V.TraverseAST(AST); assert(V.Stack.size() == 1 && "Unpaired push/pop?"); assert(V.Stack.top() == &V.Nodes.front()); - // We selected TUDecl if tokens were unclaimed (or the file is empty). - SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected; - V.Claimed.claim(Begin, End, UnclaimedTokens); - if (UnclaimedTokens || V.Nodes.size() == 1) { - StringRef FileContent = AST.getSourceManager().getBufferData(File); - // Don't require the trailing newlines to be selected. - bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size(); - V.Stack.top()->Selected = - SelectedAll ? SelectionTree::Complete : SelectionTree::Partial; - } return std::move(V.Nodes); } @@ -289,11 +473,8 @@ class SelectionVisitor : public RecursiveASTVisitor { #ifndef NDEBUG PrintPolicy(PP), #endif - Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd), - SelFile(SelFile), - SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken( - SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))), - SelEnd(SelEnd) { + TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM), + UnclaimedExpandedTokens(Tokens.expandedTokens()) { // Ensure we have a node for the TU decl, regardless of traversal scope. Nodes.emplace_back(); Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl()); @@ -346,18 +527,12 @@ class SelectionVisitor : public RecursiveASTVisitor { // don't intersect the selection may be recursively skipped. bool canSafelySkipNode(const DynTypedNode &N) { SourceRange S = N.getSourceRange(); - auto B = SM.getDecomposedLoc(S.getBegin()); - auto E = SM.getDecomposedLoc(S.getEnd()); - // Node lies in a macro expansion? - if (B.first != SelFile || E.first != SelFile) - return false; - // Node intersects selection tokens? - if (B.second < SelEnd && E.second >= SelBeginTokenStart) - return false; - // Otherwise, allow skipping over the node. - dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); - dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); - return true; + if (!SelChecker.mayHit(S)) { + dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); + dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); + return true; + } + return false; } // There are certain nodes we want to treat as leaves in the SelectionTree, @@ -377,11 +552,9 @@ class SelectionVisitor : public RecursiveASTVisitor { Nodes.emplace_back(); Nodes.back().ASTNode = std::move(Node); Nodes.back().Parent = Stack.top(); + Nodes.back().Selected = NoTokens; Stack.push(&Nodes.back()); claimRange(Early, Nodes.back().Selected); - // Early hit detection never selects the whole node. - if (Nodes.back().Selected) - Nodes.back().Selected = SelectionTree::Partial; } // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). @@ -390,6 +563,8 @@ class SelectionVisitor : public RecursiveASTVisitor { Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); claimRange(N.ASTNode.getSourceRange(), N.Selected); + if (N.Selected == NoTokens) + N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { // Attach to the tree. N.Parent->Children.push_back(&N); @@ -424,31 +599,12 @@ class SelectionVisitor : public RecursiveASTVisitor { // This is usually called from pop(), so we can take children into account. // The existing state of Result is relevant (early/late claims can interact). void claimRange(SourceRange S, SelectionTree::Selection &Result) { - if (!S.isValid()) - return; - // toHalfOpenFileRange() allows selection of constructs in macro args. e.g: - // #define LOOP_FOREVER(Body) for(;;) { Body } - // void IncrementLots(int &x) { - // LOOP_FOREVER( ++x; ) - // } - // Selecting "++x" or "x" will do the right thing. - auto Range = toHalfOpenFileRange(SM, LangOpts, S); - assert(Range && "We should be able to get the File Range"); - dlog("{1}claimRange: {0}", Range->printToString(SM), indent()); - auto B = SM.getDecomposedLoc(Range->getBegin()); - auto E = SM.getDecomposedLoc(Range->getEnd()); - // Otherwise, nodes in macro expansions can't be selected. - if (B.first != SelFile || E.first != SelFile) - return; - // Attempt to claim the remaining range. If there's nothing to claim, only - // children were selected. - Claimed.claim(B.second, E.second, Result); - if (Result) - dlog("{1}hit selection: {0}", - SourceRange(SM.getComposedLoc(B.first, B.second), - SM.getComposedLoc(E.first, E.second)) - .printToString(SM), - indent()); + for (const auto &ClaimedRange : + UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S))) + update(Result, SelChecker.test(ClaimedRange)); + + if (Result && Result != NoTokens) + dlog("{1}hit selection: {0}", S.printToString(SM), indent()); } std::string indent(int Offset = 0) { @@ -463,17 +619,11 @@ class SelectionVisitor : public RecursiveASTVisitor { #ifndef NDEBUG const PrintingPolicy &PrintPolicy; #endif + const syntax::TokenBuffer &TokenBuf; std::stack Stack; - SelectedTokens Claimed; + SelectionTester SelChecker; + IntervalSet UnclaimedExpandedTokens; std::deque Nodes; // Stable pointers as we add more nodes. - FileID SelFile; - // If the selection start slices a token in half, the beginning of that token. - // This is useful for checking whether the end of a token range overlaps - // the selection: range.end < SelBeginTokenStart is equivalent to - // range.end + measureToken(range.end) < SelBegin (assuming range.end points - // to a token), and it saves a lex every time. - unsigned SelBeginTokenStart; - unsigned SelEnd; }; } // namespace @@ -513,8 +663,9 @@ static std::pair pointBounds(unsigned Offset, FileID FID, return {Offset - 1, Offset}; // We could choose either this byte or the previous. Usually we prefer the // character on the right of the cursor (or under a block cursor). - // But if that's whitespace, we likely want the token on the left. - if (isWhitespace(Buf[Offset]) && !isWhitespace(Buf[Offset - 1])) + // But if that's whitespace/semicolon, we likely want the token on the left. + auto IsIgnoredChar = [](char C) { return isWhitespace(C) || C == ';'; }; + if (IsIgnoredChar(Buf[Offset]) && !IsIgnoredChar(Buf[Offset - 1])) return {Offset - 1, Offset}; return {Offset, Offset + 1}; } diff --git a/clang-tools-extra/clangd/Selection.h b/clang-tools-extra/clangd/Selection.h index 9bcb9d5fb01f0..a7050c49be6ba 100644 --- a/clang-tools-extra/clangd/Selection.h +++ b/clang-tools-extra/clangd/Selection.h @@ -76,7 +76,7 @@ class SelectionTree { unsigned Start, unsigned End); // Describes to what extent an AST node is covered by the selection. - enum Selection { + enum Selection : unsigned char { // The AST node owns no characters covered by the selection. // Note that characters owned by children don't count: // if (x == 0) scream(); diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp index 91a5582ac29a4..cbbf31f1b05b5 100644 --- a/clang-tools-extra/clangd/SemanticSelection.cpp +++ b/clang-tools-extra/clangd/SemanticSelection.cpp @@ -30,7 +30,7 @@ llvm::Expected> getSemanticRanges(ParsedAST &AST, Position Pos) { std::vector Result; const auto &SM = AST.getSourceManager(); - const auto &LangOpts = AST.getASTContext().getLangOpts(); + const auto &LangOpts = AST.getLangOpts(); auto FID = SM.getMainFileID(); auto Offset = positionToOffset(SM.getBufferData(FID), Pos); diff --git a/clang-tools-extra/clangd/Shutdown.cpp b/clang-tools-extra/clangd/Shutdown.cpp new file mode 100644 index 0000000000000..dfea46d8dfeb8 --- /dev/null +++ b/clang-tools-extra/clangd/Shutdown.cpp @@ -0,0 +1,39 @@ +//===--- Shutdown.cpp - Unclean exit scenarios ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Shutdown.h" + +#include +#include + +namespace clang { +namespace clangd { + +void abortAfterTimeout(std::chrono::seconds Timeout) { + // This is more portable than sys::WatchDog, and yields a stack trace. + std::thread([Timeout] { + std::this_thread::sleep_for(Timeout); + std::abort(); + }).detach(); +} + +static std::atomic ShutdownRequested = {false}; + +void requestShutdown() { + if (ShutdownRequested.exchange(true)) + // This is the second shutdown request. Exit hard. + std::abort(); +} + +bool shutdownRequested() { + return ShutdownRequested; +} + +} // namespace clangd +} // namespace clang + diff --git a/clang-tools-extra/clangd/Shutdown.h b/clang-tools-extra/clangd/Shutdown.h new file mode 100644 index 0000000000000..3097f6a3e63c7 --- /dev/null +++ b/clang-tools-extra/clangd/Shutdown.h @@ -0,0 +1,84 @@ +//===--- Shutdown.h - Unclean exit scenarios --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// LSP specifies a protocol for shutting down: a `shutdown` request followed +// by an `exit` notification. If this protocol is followed, clangd should +// finish outstanding work and exit with code 0. +// +// The way this works in the happy case: +// - when ClangdLSPServer gets `shutdown`, it sets a flag +// - when ClangdLSPServer gets `exit`, it returns false to indicate end-of-LSP +// - Transport::loop() returns with no error +// - ClangdServer::run() checks the shutdown flag and returns with no error. +// - we `return 0` from main() +// - destructor of ClangdServer and other main()-locals runs. +// This blocks until outstanding requests complete (results are ignored) +// - global destructors run, such as fallback deletion of temporary files +// +// There are a number of things that can go wrong. Some are handled here, and +// some elsewhere. +// - `exit` notification with no `shutdown`: +// ClangdServer::run() sees this and returns false, main() returns nonzero. +// - stdin/stdout are closed +// The Transport detects this while doing IO and returns an error from loop() +// ClangdServer::run() logs a message and then returns false, etc +// - a request thread gets stuck, so the ClangdServer destructor hangs. +// Before returning from main(), we start a watchdog thread to abort() the +// process if it takes too long to exit. See abortAfterTimeout(). +// - clangd crashes (e.g. segfault or assertion) +// A fatal signal is sent (SEGV, ABRT, etc) +// The installed signal handler prints a stack trace and exits. +// - parent process goes away or tells us to shut down +// A "graceful shutdown" signal is sent (TERM, HUP, etc). +// The installed signal handler calls requestShutdown() which sets a flag. +// The Transport IO is interrupted, and Transport::loop() checks the flag and +// returns an error, etc. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SHUTDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SHUTDOWN_H + +#include +#include + +namespace clang { +namespace clangd { + +/// Causes this process to crash if still running after Timeout. +void abortAfterTimeout(std::chrono::seconds Timeout); + +/// Sets a flag to indicate that clangd was sent a shutdown signal, and the +/// transport loop should exit at the next opportunity. +/// If shutdown was already requested, aborts the process. +/// This function is threadsafe and signal-safe. +void requestShutdown(); +/// Checks whether requestShutdown() was called. +/// This function is threadsafe and signal-safe. +bool shutdownRequested(); + +/// Retry an operation if it gets interrupted by a signal. +/// This is like llvm::sys::RetryAfterSignal, except that if shutdown was +/// requested (which interrupts IO), we'll fail rather than retry. +template ()())> +Ret retryAfterSignalUnlessShutdown( + const typename std::enable_if::type &Fail, // Suppress deduction. + const Fun &F) { + Ret Res; + do { + if (shutdownRequested()) + return Fail; + errno = 0; + Res = F(); + } while (Res == Fail && errno == EINTR); + return Res; +} + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index 3b8aacef9bf17..f75be998dc2d4 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -223,6 +223,9 @@ struct Edit { /// Checks whether the Replacements are applicable to given Code. bool canApplyTo(llvm::StringRef Code) const; }; +/// A mapping from absolute file path (the one used for accessing the underlying +/// VFS) to edits. +using FileEdits = llvm::StringMap; /// Formats the edits and code around it according to Style. Changes /// Replacements to formatted ones if succeeds. diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 6436e7a50c615..884c82d5b1909 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -407,8 +407,12 @@ void ASTWorker::update(ParseInputs Inputs, WantDiagnostics WantDiags) { llvm::join(Inputs.CompileCommand.CommandLine, " ")); // Rebuild the preamble and the AST. StoreDiags CompilerInvocationDiagConsumer; - std::unique_ptr Invocation = - buildCompilerInvocation(Inputs, CompilerInvocationDiagConsumer); + std::vector CC1Args; + std::unique_ptr Invocation = buildCompilerInvocation( + Inputs, CompilerInvocationDiagConsumer, &CC1Args); + // Log cc1 args even (especially!) if creating invocation failed. + if (!CC1Args.empty()) + vlog("Driver produced command: cc1 {0}", llvm::join(CC1Args, " ")); std::vector CompilerInvocationDiags = CompilerInvocationDiagConsumer.take(); if (!Invocation) { @@ -916,6 +920,13 @@ llvm::StringRef TUScheduler::getContents(PathRef File) const { return It->second->Contents; } +llvm::StringMap TUScheduler::getAllFileContents() const { + llvm::StringMap Results; + for (auto &It : Files) + Results.try_emplace(It.getKey(), It.getValue()->Contents); + return Results; +} + void TUScheduler::run(llvm::StringRef Name, llvm::unique_function Action) { if (!PreambleTasks) diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index ff2d4d485047f..de3b895499831 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -180,6 +180,9 @@ class TUScheduler { /// The returned StringRef may be invalidated by any write to TUScheduler. llvm::StringRef getContents(PathRef File) const; + /// Returns a snapshot of all file buffer contents, per last update(). + llvm::StringMap getAllFileContents() const; + /// Schedule an async task with no dependencies. void run(llvm::StringRef Name, llvm::unique_function Action); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index de10e3c48e202..8bcc268d1b187 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -191,9 +191,8 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, // Macros are simple: there's no declaration/definition distinction. // As a consequence, there's no need to look them up in the index either. - SourceLocation MaybeMacroLocation = - SM.getMacroArgExpandedLocation(getBeginningOfIdentifier( - Pos, AST.getSourceManager(), AST.getASTContext().getLangOpts())); + SourceLocation MaybeMacroLocation = SM.getMacroArgExpandedLocation( + getBeginningOfIdentifier(Pos, AST.getSourceManager(), AST.getLangOpts())); std::vector Result; if (auto M = locateMacroAt(MaybeMacroLocation, AST.getPreprocessor())) { if (auto Loc = makeLocation(AST.getASTContext(), @@ -366,7 +365,7 @@ std::vector findDocumentHighlights(ParsedAST &AST, auto References = findRefs( getDeclAtPosition(AST, SM.getMacroArgExpandedLocation(getBeginningOfIdentifier( - Pos, SM, AST.getASTContext().getLangOpts())), + Pos, SM, AST.getLangOpts())), Relations), AST); @@ -374,9 +373,8 @@ std::vector findDocumentHighlights(ParsedAST &AST, // different kinds, deduplicate them. std::vector Result; for (const auto &Ref : References) { - if (auto Range = - getTokenRange(AST.getASTContext().getSourceManager(), - AST.getASTContext().getLangOpts(), Ref.Loc)) { + if (auto Range = getTokenRange(AST.getASTContext().getSourceManager(), + AST.getLangOpts(), Ref.Loc)) { DocumentHighlight DH; DH.range = *Range; if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Write)) @@ -404,7 +402,7 @@ ReferencesResult findReferences(ParsedAST &AST, Position Pos, uint32_t Limit, return Results; } auto Loc = SM.getMacroArgExpandedLocation( - getBeginningOfIdentifier(Pos, SM, AST.getASTContext().getLangOpts())); + getBeginningOfIdentifier(Pos, SM, AST.getLangOpts())); // TODO: should we handle macros, too? // We also show references to the targets of using-decls, so we include // DeclRelation::Underlying. @@ -424,8 +422,7 @@ ReferencesResult findReferences(ParsedAST &AST, Position Pos, uint32_t Limit, }), MainFileRefs.end()); for (const auto &Ref : MainFileRefs) { - if (auto Range = - getTokenRange(SM, AST.getASTContext().getLangOpts(), Ref.Loc)) { + if (auto Range = getTokenRange(SM, AST.getLangOpts(), Ref.Loc)) { Location Result; Result.range = *Range; Result.uri = URIForFile::canonicalize(*MainFilePath, *MainFilePath); @@ -470,7 +467,7 @@ ReferencesResult findReferences(ParsedAST &AST, Position Pos, uint32_t Limit, std::vector getSymbolInfo(ParsedAST &AST, Position Pos) { const SourceManager &SM = AST.getSourceManager(); auto Loc = SM.getMacroArgExpandedLocation( - getBeginningOfIdentifier(Pos, SM, AST.getASTContext().getLangOpts())); + getBeginningOfIdentifier(Pos, SM, AST.getLangOpts())); std::vector Results; @@ -646,7 +643,7 @@ static void fillSuperTypes(const CXXRecordDecl &CXXRD, ASTContext &ASTCtx, const CXXRecordDecl *findRecordTypeAt(ParsedAST &AST, Position Pos) { const SourceManager &SM = AST.getSourceManager(); SourceLocation SourceLocationBeg = SM.getMacroArgExpandedLocation( - getBeginningOfIdentifier(Pos, SM, AST.getASTContext().getLangOpts())); + getBeginningOfIdentifier(Pos, SM, AST.getLangOpts())); DeclRelationSet Relations = DeclRelation::TemplatePattern | DeclRelation::Underlying; auto Decls = getDeclAtPosition(AST, SourceLocationBeg, Relations); diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/package.json b/clang-tools-extra/clangd/clients/clangd-vscode/package.json index 05aafeb5f850c..8abf7e743e6f2 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/package.json +++ b/clang-tools-extra/clangd/clients/clangd-vscode/package.json @@ -23,6 +23,7 @@ "activationEvents": [ "onLanguage:c", "onLanguage:cpp", + "onLanguage:cuda", "onLanguage:objective-c", "onLanguage:objective-cpp", "onCommand:clangd-vscode.activate" @@ -64,6 +65,13 @@ "**/MSVC/*/include/**" ], "firstLine": "^/[/*].*-\\*-\\s*C\\+\\+\\s*-\\*-.*" + }, + { + "id": "cuda", + "extensions": [ + ".cu", + ".cuh" + ] } ], "configuration": { diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts index 330cf7ac262eb..1f96cffef2559 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts +++ b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts @@ -83,21 +83,15 @@ export function activate(context: vscode.ExtensionContext) { } const serverOptions: vscodelc.ServerOptions = clangd; - // Note that CUDA ('.cu') files are special. When opening files of all other - // extensions, VSCode would load clangd automatically. This is achieved by - // having a corresponding 'onLanguage:...' activation event in package.json. - // However, VSCode does not have CUDA as a supported language yet, so we - // cannot add a corresponding activationEvent for CUDA files and clangd will - // *not* load itself automatically on '.cu' files. - const cudaFilePattern: string = '**/*.{' + [ 'cu' ].join() + '}'; const clientOptions: vscodelc.LanguageClientOptions = { // Register the server for c-family and cuda files. documentSelector: [ { scheme: 'file', language: 'c' }, { scheme: 'file', language: 'cpp' }, + // cuda is not supported by vscode, but our extension does. + { scheme: 'file', language: 'cuda' }, { scheme: 'file', language: 'objective-c'}, - { scheme: 'file', language: 'objective-cpp'}, - { scheme: 'file', pattern: cudaFilePattern }, + { scheme: 'file', language: 'objective-cpp'} ], synchronize: !syncFileEvents ? undefined : { // FIXME: send sync file events when clangd provides implemenatations. @@ -111,10 +105,10 @@ export function activate(context: vscode.ExtensionContext) { serverOptions, clientOptions); if (getConfig('semanticHighlighting')) { const semanticHighlightingFeature = - new semanticHighlighting.SemanticHighlightingFeature(clangdClient, - context); + new semanticHighlighting.SemanticHighlightingFeature(clangdClient, + context); context.subscriptions.push( - vscode.Disposable.from(semanticHighlightingFeature)); + vscode.Disposable.from(semanticHighlightingFeature)); clangdClient.registerFeature(semanticHighlightingFeature); } console.log('Clang Language Server is now active!'); diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 00adbd84fd62f..191cd68ccb29e 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -16,6 +16,7 @@ #include "SourceCode.h" #include "SymbolLocation.h" #include "URI.h" +#include "index/SymbolID.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" @@ -345,43 +346,52 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name, const MacroInfo *MI, index::SymbolRoleSet Roles, SourceLocation Loc) { - if (!Opts.CollectMacro) - return true; assert(PP.get()); const auto &SM = PP->getSourceManager(); auto DefLoc = MI->getDefinitionLoc(); + auto SpellingLoc = SM.getSpellingLoc(Loc); + bool IsMainFileSymbol = SM.isInMainFile(SM.getExpansionLoc(DefLoc)); // Builtin macros don't have useful locations and aren't needed in completion. if (MI->isBuiltinMacro()) return true; - // Skip main-file symbols if we are not collecting them. - bool IsMainFileSymbol = SM.isInMainFile(SM.getExpansionLoc(DefLoc)); - if (IsMainFileSymbol && !Opts.CollectMainFileSymbols) - return false; - // Also avoid storing predefined macros like __DBL_MIN__. if (SM.isWrittenInBuiltinFile(DefLoc)) return true; + auto ID = getSymbolID(Name->getName(), MI, SM); + if (!ID) + return true; + + // Do not store references to main-file macros. + if ((static_cast(Opts.RefFilter) & Roles) && !IsMainFileSymbol && + (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) + MacroRefs[*ID].push_back({Loc, Roles}); + + // Collect symbols. + if (!Opts.CollectMacro) + return true; + + // Skip main-file macros if we are not collecting them. + if (IsMainFileSymbol && !Opts.CollectMainFileSymbols) + return false; + // Mark the macro as referenced if this is a reference coming from the main // file. The macro may not be an interesting symbol, but it's cheaper to check // at the end. if (Opts.CountReferences && (Roles & static_cast(index::SymbolRole::Reference)) && - SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID()) + SM.getFileID(SpellingLoc) == SM.getMainFileID()) ReferencedMacros.insert(Name); + // Don't continue indexing if this is a mere reference. // FIXME: remove macro with ID if it is undefined. if (!(Roles & static_cast(index::SymbolRole::Declaration) || Roles & static_cast(index::SymbolRole::Definition))) return true; - auto ID = getSymbolID(Name->getName(), MI, SM); - if (!ID) - return true; - // Only collect one instance in case there are multiple. if (Symbols.find(*ID) != nullptr) return true; @@ -485,10 +495,10 @@ void SymbolCollector::finish() { IncRef(*ID); } } - // Fill in IncludeHeaders. // We delay this until end of TU so header guards are all resolved. - // Symbols in slabs aren' mutable, so insert() has to walk all the strings :-( + // Symbols in slabs aren' mutable, so insert() has to walk all the strings + // :-( llvm::SmallString<256> QName; for (const auto &Entry : IncludeFiles) if (const Symbol *S = Symbols.find(Entry.first)) { @@ -518,25 +528,34 @@ void SymbolCollector::finish() { } return Found->second; }; + auto CollectRef = + [&](SymbolID ID, + const std::pair &LocAndRole) { + auto FileID = SM.getFileID(LocAndRole.first); + // FIXME: use the result to filter out references. + shouldIndexFile(FileID); + if (auto FileURI = GetURI(FileID)) { + auto Range = + getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts()); + Ref R; + R.Location.Start = Range.first; + R.Location.End = Range.second; + R.Location.FileURI = FileURI->c_str(); + R.Kind = toRefKind(LocAndRole.second); + Refs.insert(ID, R); + } + }; + // Populate Refs slab from MacroRefs. + for (const auto &IDAndRefs : MacroRefs) { + for (const auto &LocAndRole : IDAndRefs.second) + CollectRef(IDAndRefs.first, LocAndRole); + } // Populate Refs slab from DeclRefs. if (auto MainFileURI = GetURI(SM.getMainFileID())) { for (const auto &It : DeclRefs) { if (auto ID = getSymbolID(It.first)) { - for (const auto &LocAndRole : It.second) { - auto FileID = SM.getFileID(LocAndRole.first); - // FIXME: use the result to filter out references. - shouldIndexFile(FileID); - if (auto FileURI = GetURI(FileID)) { - auto Range = - getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts()); - Ref R; - R.Location.Start = Range.first; - R.Location.End = Range.second; - R.Location.FileURI = FileURI->c_str(); - R.Kind = toRefKind(LocAndRole.second); - Refs.insert(*ID, R); - } - } + for (const auto &LocAndRole : It.second) + CollectRef(*ID, LocAndRole); } } } diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index 5ad44150b4d56..bc5095d516db8 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -151,11 +151,12 @@ class SymbolCollector : public index::IndexDataConsumer { std::shared_ptr CompletionAllocator; std::unique_ptr CompletionTUInfo; Options Opts; - using DeclRef = std::pair; + using SymbolRef = std::pair; // Symbols referenced from the current TU, flushed on finish(). llvm::DenseSet ReferencedDecls; llvm::DenseSet ReferencedMacros; - llvm::DenseMap> DeclRefs; + llvm::DenseMap> DeclRefs; + llvm::DenseMap> MacroRefs; // Maps canonical declaration provided by clang to canonical declaration for // an index symbol, if clangd prefers a different declaration than that // provided by clang. For example, friend declaration might be considered diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index fb83083384f95..3f3c216c5909c 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -18,6 +18,9 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" namespace clang { namespace clangd { @@ -55,8 +58,7 @@ llvm::Optional getOtherRefFile(const Decl &D, StringRef MainFile, // tradeoff. We expect the number of symbol references in the current file // is smaller than the limit. Req.Limit = 100; - if (auto ID = getSymbolID(&D)) - Req.IDs.insert(*ID); + Req.IDs.insert(*getSymbolID(&D)); llvm::Optional OtherFile; Index.refs(Req, [&](const Ref &R) { if (OtherFile) @@ -83,7 +85,7 @@ llvm::DenseSet locateDeclAt(ParsedAST &AST, // range of the Decl. This would avoid allowing rename on unrelated tokens. // ^class Foo {} // SelectionTree returns CXXRecordDecl, // // we don't attempt to trigger rename on this position. - // FIXME: make this work on destructors, e.g. "~F^oo()". + // FIXME: Make this work on destructors, e.g. "~F^oo()". if (const auto *D = SelectedNode->ASTNode.get()) { if (D->getLocation() != TokenStartLoc) return {}; @@ -101,71 +103,101 @@ enum ReasonToReject { NoSymbolFound, NoIndexProvided, NonIndexable, - UsedOutsideFile, + UsedOutsideFile, // for within-file rename only. UnsupportedSymbol, AmbiguousSymbol, }; -// Check the symbol Decl is renameable (per the index) within the file. -llvm::Optional renamableWithinFile(const Decl &RenameDecl, - StringRef MainFile, - const SymbolIndex *Index) { +llvm::Optional renameable(const Decl &RenameDecl, + StringRef MainFilePath, + const SymbolIndex *Index, + bool CrossFile) { + // Filter out symbols that are unsupported in both rename modes. if (llvm::isa(&RenameDecl)) return ReasonToReject::UnsupportedSymbol; if (const auto *FD = llvm::dyn_cast(&RenameDecl)) { if (FD->isOverloadedOperator()) return ReasonToReject::UnsupportedSymbol; } + // function-local symbols is safe to rename. + if (RenameDecl.getParentFunctionOrMethod()) + return None; + + // Check whether the symbol being rename is indexable. auto &ASTCtx = RenameDecl.getASTContext(); - const auto &SM = ASTCtx.getSourceManager(); - bool MainFileIsHeader = isHeaderFile(MainFile, ASTCtx.getLangOpts()); - bool DeclaredInMainFile = isInsideMainFile(RenameDecl.getBeginLoc(), SM); + bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts()); + bool DeclaredInMainFile = + isInsideMainFile(RenameDecl.getBeginLoc(), ASTCtx.getSourceManager()); + bool IsMainFileOnly = true; + if (MainFileIsHeader) + // main file is a header, the symbol can't be main file only. + IsMainFileOnly = false; + else if (!DeclaredInMainFile) + IsMainFileOnly = false; + bool IsIndexable = + isa(RenameDecl) && + SymbolCollector::shouldCollectSymbol( + cast(RenameDecl), RenameDecl.getASTContext(), + SymbolCollector::Options(), IsMainFileOnly); + if (!IsIndexable) // If the symbol is not indexable, we disallow rename. + return ReasonToReject::NonIndexable; - if (!DeclaredInMainFile) - // We are sure the symbol is used externally, bail out early. - return UsedOutsideFile; + if (!CrossFile) { + if (!DeclaredInMainFile) + // We are sure the symbol is used externally, bail out early. + return ReasonToReject::UsedOutsideFile; - // If the symbol is declared in the main file (which is not a header), we - // rename it. - if (!MainFileIsHeader) - return None; + // If the symbol is declared in the main file (which is not a header), we + // rename it. + if (!MainFileIsHeader) + return None; - // Below are cases where the symbol is declared in the header. - // If the symbol is function-local, we rename it. - if (RenameDecl.getParentFunctionOrMethod()) - return None; + if (!Index) + return ReasonToReject::NoIndexProvided; + + auto OtherFile = getOtherRefFile(RenameDecl, MainFilePath, *Index); + // If the symbol is indexable and has no refs from other files in the index, + // we rename it. + if (!OtherFile) + return None; + // If the symbol is indexable and has refs from other files in the index, + // we disallow rename. + return ReasonToReject::UsedOutsideFile; + } + assert(CrossFile); if (!Index) return ReasonToReject::NoIndexProvided; - bool IsIndexable = isa(RenameDecl) && - SymbolCollector::shouldCollectSymbol( - cast(RenameDecl), ASTCtx, {}, false); - // If the symbol is not indexable, we disallow rename. - if (!IsIndexable) - return ReasonToReject::NonIndexable; - auto OtherFile = getOtherRefFile(RenameDecl, MainFile, *Index); - // If the symbol is indexable and has no refs from other files in the index, - // we rename it. - if (!OtherFile) - return None; - // If the symbol is indexable and has refs from other files in the index, - // we disallow rename. - return ReasonToReject::UsedOutsideFile; + // Blacklist symbols that are not supported yet in cross-file mode due to the + // limitations of our index. + // FIXME: Renaming templates requires to rename all related specializations, + // our index doesn't have this information. + if (RenameDecl.getDescribedTemplate()) + return ReasonToReject::UnsupportedSymbol; + + // FIXME: Renaming virtual methods requires to rename all overridens in + // subclasses, our index doesn't have this information. + // Note: Within-file rename does support this through the AST. + if (const auto *S = llvm::dyn_cast(&RenameDecl)) { + if (S->isVirtual()) + return ReasonToReject::UnsupportedSymbol; + } + return None; } llvm::Error makeError(ReasonToReject Reason) { auto Message = [](ReasonToReject Reason) { switch (Reason) { - case NoSymbolFound: + case ReasonToReject::NoSymbolFound: return "there is no symbol at the given location"; - case NoIndexProvided: - return "symbol may be used in other files (no index available)"; - case UsedOutsideFile: + case ReasonToReject::NoIndexProvided: + return "no index provided"; + case ReasonToReject::UsedOutsideFile: return "the symbol is used outside main file"; - case NonIndexable: + case ReasonToReject::NonIndexable: return "symbol may be used in other files (not eligible for indexing)"; - case UnsupportedSymbol: + case ReasonToReject::UnsupportedSymbol: return "symbol is not a supported kind (e.g. namespace, macro)"; case AmbiguousSymbol: return "there are multiple symbols at the given location"; @@ -188,7 +220,7 @@ std::vector findOccurrencesWithinFile(ParsedAST &AST, ND.getDescribedTemplate() ? *ND.getDescribedTemplate() : ND; // getUSRsForDeclaration will find other related symbols, e.g. virtual and its // overriddens, primary template and all explicit specializations. - // FIXME: get rid of the remaining tooling APIs. + // FIXME: Get rid of the remaining tooling APIs. std::vector RenameUSRs = tooling::getUSRsForDeclaration( tooling::getCanonicalSymbolDeclaration(&RenameDecl), AST.getASTContext()); llvm::DenseSet TargetIDs; @@ -212,35 +244,14 @@ std::vector findOccurrencesWithinFile(ParsedAST &AST, return Results; } -} // namespace - +// AST-based rename, it renames all occurrences in the main file. llvm::Expected -renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos, - llvm::StringRef NewName, const SymbolIndex *Index) { +renameWithinFile(ParsedAST &AST, const NamedDecl &RenameDecl, + llvm::StringRef NewName) { const SourceManager &SM = AST.getSourceManager(); - SourceLocation SourceLocationBeg = SM.getMacroArgExpandedLocation( - getBeginningOfIdentifier(Pos, SM, AST.getASTContext().getLangOpts())); - // FIXME: renaming macros is not supported yet, the macro-handling code should - // be moved to rename tooling library. - if (locateMacroAt(SourceLocationBeg, AST.getPreprocessor())) - return makeError(UnsupportedSymbol); - - auto DeclsUnderCursor = locateDeclAt(AST, SourceLocationBeg); - if (DeclsUnderCursor.empty()) - return makeError(NoSymbolFound); - if (DeclsUnderCursor.size() > 1) - return makeError(AmbiguousSymbol); - - const auto *RenameDecl = llvm::dyn_cast(*DeclsUnderCursor.begin()); - if (!RenameDecl) - return makeError(UnsupportedSymbol); - - if (auto Reject = - renamableWithinFile(*RenameDecl->getCanonicalDecl(), File, Index)) - return makeError(*Reject); tooling::Replacements FilteredChanges; - for (SourceLocation Loc : findOccurrencesWithinFile(AST, *RenameDecl)) { + for (SourceLocation Loc : findOccurrencesWithinFile(AST, RenameDecl)) { SourceLocation RenameLoc = Loc; // We don't rename in any macro bodies, but we allow rename the symbol // spelled in a top-level macro argument in the main file. @@ -265,5 +276,233 @@ renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos, return FilteredChanges; } +Range toRange(const SymbolLocation &L) { + Range R; + R.start.line = L.Start.line(); + R.start.character = L.Start.column(); + R.end.line = L.End.line(); + R.end.character = L.End.column(); + return R; +} + +// Return all rename occurrences (using the index) outside of the main file, +// grouped by the absolute file path. +llvm::Expected>> +findOccurrencesOutsideFile(const NamedDecl &RenameDecl, + llvm::StringRef MainFile, const SymbolIndex &Index) { + RefsRequest RQuest; + RQuest.IDs.insert(*getSymbolID(&RenameDecl)); + + // Absolute file path => rename occurrences in that file. + llvm::StringMap> AffectedFiles; + // FIXME: Make the limit customizable. + static constexpr size_t MaxLimitFiles = 50; + bool HasMore = Index.refs(RQuest, [&](const Ref &R) { + if (AffectedFiles.size() > MaxLimitFiles) + return; + if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) { + if (*RefFilePath != MainFile) + AffectedFiles[*RefFilePath].push_back(toRange(R.Location)); + } + }); + + if (AffectedFiles.size() > MaxLimitFiles) + return llvm::make_error( + llvm::formatv("The number of affected files exceeds the max limit {0}", + MaxLimitFiles), + llvm::inconvertibleErrorCode()); + if (HasMore) { + return llvm::make_error( + llvm::formatv("The symbol {0} has too many occurrences", + RenameDecl.getQualifiedNameAsString()), + llvm::inconvertibleErrorCode()); + } + + return AffectedFiles; +} + +// Index-based rename, it renames all occurrences outside of the main file. +// +// The cross-file rename is purely based on the index, as we don't want to +// build all ASTs for affected files, which may cause a performance hit. +// We choose to trade off some correctness for performance and scalability. +// +// Clangd builds a dynamic index for all opened files on top of the static +// index of the whole codebase. Dynamic index is up-to-date (respects dirty +// buffers) as long as clangd finishes processing opened files, while static +// index (background index) is relatively stale. We choose the dirty buffers +// as the file content we rename on, and fallback to file content on disk if +// there is no dirty buffer. +// +// FIXME: Add range patching heuristics to detect staleness of the index, and +// report to users. +// FIXME: Our index may return implicit references, which are not eligible for +// rename, we should filter out these references. +llvm::Expected renameOutsideFile( + const NamedDecl &RenameDecl, llvm::StringRef MainFilePath, + llvm::StringRef NewName, const SymbolIndex &Index, + llvm::function_ref(PathRef)> GetFileContent) { + auto AffectedFiles = + findOccurrencesOutsideFile(RenameDecl, MainFilePath, Index); + if (!AffectedFiles) + return AffectedFiles.takeError(); + FileEdits Results; + for (auto &FileAndOccurrences : *AffectedFiles) { + llvm::StringRef FilePath = FileAndOccurrences.first(); + + auto AffectedFileCode = GetFileContent(FilePath); + if (!AffectedFileCode) { + elog("Fail to read file content: {0}", AffectedFileCode.takeError()); + continue; + } + auto RenameEdit = + buildRenameEdit(FilePath, *AffectedFileCode, + std::move(FileAndOccurrences.second), NewName); + if (!RenameEdit) { + return llvm::make_error( + llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, + llvm::toString(RenameEdit.takeError())), + llvm::inconvertibleErrorCode()); + } + if (!RenameEdit->Replacements.empty()) + Results.insert({FilePath, std::move(*RenameEdit)}); + } + return Results; +} + +} // namespace + +llvm::Expected rename(const RenameInputs &RInputs) { + ParsedAST &AST = RInputs.AST; + const SourceManager &SM = AST.getSourceManager(); + llvm::StringRef MainFileCode = SM.getBufferData(SM.getMainFileID()); + auto GetFileContent = [&RInputs, + &SM](PathRef AbsPath) -> llvm::Expected { + llvm::Optional DirtyBuffer; + if (RInputs.GetDirtyBuffer && + (DirtyBuffer = RInputs.GetDirtyBuffer(AbsPath))) + return std::move(*DirtyBuffer); + + auto Content = + SM.getFileManager().getVirtualFileSystem().getBufferForFile(AbsPath); + if (!Content) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + llvm::formatv("Fail to open file {0}: {1}", AbsPath, + Content.getError().message())); + if (!*Content) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + llvm::formatv("Got no buffer for file {0}", AbsPath)); + + return (*Content)->getBuffer().str(); + }; + SourceLocation SourceLocationBeg = SM.getMacroArgExpandedLocation( + getBeginningOfIdentifier(RInputs.Pos, SM, AST.getLangOpts())); + // FIXME: Renaming macros is not supported yet, the macro-handling code should + // be moved to rename tooling library. + if (locateMacroAt(SourceLocationBeg, AST.getPreprocessor())) + return makeError(ReasonToReject::UnsupportedSymbol); + + auto DeclsUnderCursor = locateDeclAt(AST, SourceLocationBeg); + if (DeclsUnderCursor.empty()) + return makeError(ReasonToReject::NoSymbolFound); + if (DeclsUnderCursor.size() > 1) + return makeError(ReasonToReject::AmbiguousSymbol); + + const auto *RenameDecl = llvm::dyn_cast(*DeclsUnderCursor.begin()); + if (!RenameDecl) + return makeError(ReasonToReject::UnsupportedSymbol); + + auto Reject = + renameable(*RenameDecl->getCanonicalDecl(), RInputs.MainFilePath, + RInputs.Index, RInputs.AllowCrossFile); + if (Reject) + return makeError(*Reject); + + // We have two implementations of the rename: + // - AST-based rename: used for renaming local symbols, e.g. variables + // defined in a function body; + // - index-based rename: used for renaming non-local symbols, and not + // feasible for local symbols (as by design our index don't index these + // symbols by design; + // To make cross-file rename work for local symbol, we use a hybrid solution: + // - run AST-based rename on the main file; + // - run index-based rename on other affected files; + auto MainFileRenameEdit = renameWithinFile(AST, *RenameDecl, RInputs.NewName); + if (!MainFileRenameEdit) + return MainFileRenameEdit.takeError(); + + if (!RInputs.AllowCrossFile) { + // Within-file rename: just return the main file results. + return FileEdits( + {std::make_pair(RInputs.MainFilePath, + Edit{MainFileCode, std::move(*MainFileRenameEdit)})}); + } + + FileEdits Results; + // Renameable safely guards us that at this point we are renaming a local + // symbol if we don't have index. + if (RInputs.Index) { + auto OtherFilesEdits = + renameOutsideFile(*RenameDecl, RInputs.MainFilePath, RInputs.NewName, + *RInputs.Index, GetFileContent); + if (!OtherFilesEdits) + return OtherFilesEdits.takeError(); + Results = std::move(*OtherFilesEdits); + } + // Attach the rename edits for the main file. + Results.try_emplace(RInputs.MainFilePath, MainFileCode, + std::move(*MainFileRenameEdit)); + return Results; +} + +llvm::Expected buildRenameEdit(llvm::StringRef AbsFilePath, + llvm::StringRef InitialCode, + std::vector Occurrences, + llvm::StringRef NewName) { + llvm::sort(Occurrences); + // These two always correspond to the same position. + Position LastPos{0, 0}; + size_t LastOffset = 0; + + auto Offset = [&](const Position &P) -> llvm::Expected { + assert(LastPos <= P && "malformed input"); + Position Shifted = { + P.line - LastPos.line, + P.line > LastPos.line ? P.character : P.character - LastPos.character}; + auto ShiftedOffset = + positionToOffset(InitialCode.substr(LastOffset), Shifted); + if (!ShiftedOffset) + return llvm::make_error( + llvm::formatv("fail to convert the position {0} to offset ({1})", P, + llvm::toString(ShiftedOffset.takeError())), + llvm::inconvertibleErrorCode()); + LastPos = P; + LastOffset += *ShiftedOffset; + return LastOffset; + }; + + std::vector> OccurrencesOffsets; + for (const auto &R : Occurrences) { + auto StartOffset = Offset(R.start); + if (!StartOffset) + return StartOffset.takeError(); + auto EndOffset = Offset(R.end); + if (!EndOffset) + return EndOffset.takeError(); + OccurrencesOffsets.push_back({*StartOffset, *EndOffset}); + } + + tooling::Replacements RenameEdit; + for (const auto &R : OccurrencesOffsets) { + auto ByteLength = R.second - R.first; + if (auto Err = RenameEdit.add( + tooling::Replacement(AbsFilePath, R.first, ByteLength, NewName))) + return std::move(Err); + } + return Edit(InitialCode, std::move(RenameEdit)); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h index 63a1ffe321508..6f38c14a3e2a8 100644 --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -9,7 +9,9 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_RENAME_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_RENAME_H +#include "Path.h" #include "Protocol.h" +#include "SourceCode.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Error.h" @@ -18,13 +20,40 @@ namespace clangd { class ParsedAST; class SymbolIndex; -/// Renames all occurrences of the symbol at \p Pos to \p NewName. -/// Occurrences outside the current file are not modified. -/// Returns an error if rename a symbol that's used in another file (per the -/// index). -llvm::Expected -renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos, - llvm::StringRef NewName, const SymbolIndex *Index = nullptr); +/// Gets dirty buffer for a given file \p AbsPath. +/// Returns None if there is no dirty buffer for the given file. +using DirtyBufferGetter = + llvm::function_ref(PathRef AbsPath)>; + +struct RenameInputs { + Position Pos; // the position triggering the rename + llvm::StringRef NewName; + + ParsedAST &AST; + llvm::StringRef MainFilePath; + + const SymbolIndex *Index = nullptr; + + bool AllowCrossFile = false; + // When set, used by the rename to get file content for all rename-related + // files. + // If there is no corresponding dirty buffer, we will use the file content + // from disk. + DirtyBufferGetter GetDirtyBuffer = nullptr; +}; + +/// Renames all occurrences of the symbol. +/// If AllowCrossFile is false, returns an error if rename a symbol that's used +/// in another file (per the index). +llvm::Expected rename(const RenameInputs &RInputs); + +/// Generates rename edits that replaces all given occurrences with the +/// NewName. +/// Exposed for testing only. +llvm::Expected buildRenameEdit(llvm::StringRef AbsFilePath, + llvm::StringRef InitialCode, + std::vector Occurrences, + llvm::StringRef NewName); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Tweak.h b/clang-tools-extra/clangd/refactor/Tweak.h index de655abd98c7b..69ac4ad612e9d 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.h +++ b/clang-tools-extra/clangd/refactor/Tweak.h @@ -77,9 +77,7 @@ class Tweak { struct Effect { /// A message to be displayed to the user. llvm::Optional ShowMessage; - /// A mapping from file path(the one used for accessing the underlying VFS) - /// to edits. - llvm::StringMap ApplyEdits; + FileEdits ApplyEdits; static Effect showMessage(StringRef S) { Effect E; diff --git a/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt b/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt index ddf10a2ca2bac..6f6ef4a2ace23 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt +++ b/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt @@ -15,10 +15,12 @@ add_clang_library(clangDaemonTweaks OBJECT AnnotateHighlightings.cpp DumpAST.cpp DefineInline.cpp + DefineOutline.cpp ExpandAutoType.cpp ExpandMacro.cpp ExtractFunction.cpp ExtractVariable.cpp + ObjCLocalizeStringLiteral.cpp RawStringLiteral.cpp RemoveUsingNamespace.cpp SwapIfBranches.cpp diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp new file mode 100644 index 0000000000000..f6bed9727cf10 --- /dev/null +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -0,0 +1,330 @@ +//===--- DefineOutline.cpp ---------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AST.h" +#include "FindTarget.h" +#include "HeaderSourceSwitch.h" +#include "Logger.h" +#include "Path.h" +#include "Selection.h" +#include "SourceCode.h" +#include "refactor/Tweak.h" +#include "clang/AST/ASTTypeTraits.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Driver/Types.h" +#include "clang/Format/Format.h" +#include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace clang { +namespace clangd { +namespace { + +// Deduces the FunctionDecl from a selection. Requires either the function body +// or the function decl to be selected. Returns null if none of the above +// criteria is met. +// FIXME: This is shared with define inline, move them to a common header once +// we have a place for such. +const FunctionDecl *getSelectedFunction(const SelectionTree::Node *SelNode) { + if (!SelNode) + return nullptr; + const ast_type_traits::DynTypedNode &AstNode = SelNode->ASTNode; + if (const FunctionDecl *FD = AstNode.get()) + return FD; + if (AstNode.get() && + SelNode->Selected == SelectionTree::Complete) { + if (const SelectionTree::Node *P = SelNode->Parent) + return P->ASTNode.get(); + } + return nullptr; +} + +llvm::Optional getSourceFile(llvm::StringRef FileName, + const Tweak::Selection &Sel) { + if (auto Source = getCorrespondingHeaderOrSource( + FileName, + &Sel.AST.getSourceManager().getFileManager().getVirtualFileSystem())) + return *Source; + return getCorrespondingHeaderOrSource(FileName, Sel.AST, Sel.Index); +} + +// Synthesize a DeclContext for TargetNS from CurContext. TargetNS must be empty +// for global namespace, and endwith "::" otherwise. +// Returns None if TargetNS is not a prefix of CurContext. +llvm::Optional +findContextForNS(llvm::StringRef TargetNS, const DeclContext *CurContext) { + assert(TargetNS.empty() || TargetNS.endswith("::")); + // Skip any non-namespace contexts, e.g. TagDecls, functions/methods. + CurContext = CurContext->getEnclosingNamespaceContext(); + // If TargetNS is empty, it means global ns, which is translation unit. + if (TargetNS.empty()) { + while (!CurContext->isTranslationUnit()) + CurContext = CurContext->getParent(); + return CurContext; + } + // Otherwise we need to drop any trailing namespaces from CurContext until + // we reach TargetNS. + std::string TargetContextNS = + CurContext->isNamespace() + ? llvm::cast(CurContext)->getQualifiedNameAsString() + : ""; + TargetContextNS.append("::"); + + llvm::StringRef CurrentContextNS(TargetContextNS); + // If TargetNS is not a prefix of CurrentContext, there's no way to reach + // it. + if (!CurrentContextNS.startswith(TargetNS)) + return llvm::None; + + while (CurrentContextNS != TargetNS) { + CurContext = CurContext->getParent(); + // These colons always exists since TargetNS is a prefix of + // CurrentContextNS, it ends with "::" and they are not equal. + CurrentContextNS = CurrentContextNS.take_front( + CurrentContextNS.drop_back(2).rfind("::") + 2); + } + return CurContext; +} + +// Returns source code for FD after applying Replacements. +// FIXME: Make the function take a parameter to return only the function body, +// afterwards it can be shared with define-inline code action. +llvm::Expected +getFunctionSourceAfterReplacements(const FunctionDecl *FD, + const tooling::Replacements &Replacements) { + const auto &SM = FD->getASTContext().getSourceManager(); + auto OrigFuncRange = toHalfOpenFileRange( + SM, FD->getASTContext().getLangOpts(), FD->getSourceRange()); + if (!OrigFuncRange) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Couldn't get range for function."); + // Include template parameter list. + if (auto *FTD = FD->getDescribedFunctionTemplate()) + OrigFuncRange->setBegin(FTD->getBeginLoc()); + + // Get new begin and end positions for the qualified function definition. + unsigned FuncBegin = SM.getFileOffset(OrigFuncRange->getBegin()); + unsigned FuncEnd = Replacements.getShiftedCodePosition( + SM.getFileOffset(OrigFuncRange->getEnd())); + + // Trim the result to function definition. + auto QualifiedFunc = tooling::applyAllReplacements( + SM.getBufferData(SM.getMainFileID()), Replacements); + if (!QualifiedFunc) + return QualifiedFunc.takeError(); + return QualifiedFunc->substr(FuncBegin, FuncEnd - FuncBegin + 1); +} + +// Creates a modified version of function definition that can be inserted at a +// different location, qualifies return value and function name to achieve that. +// Contains function signature, body and template parameters if applicable. +// No need to qualify parameters, as they are looked up in the context +// containing the function/method. +llvm::Expected +getFunctionSourceCode(const FunctionDecl *FD, llvm::StringRef TargetNamespace) { + auto &SM = FD->getASTContext().getSourceManager(); + auto TargetContext = findContextForNS(TargetNamespace, FD->getDeclContext()); + if (!TargetContext) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "define outline: couldn't find a context for target"); + + llvm::Error Errors = llvm::Error::success(); + tooling::Replacements QualifierInsertions; + + // Finds the first unqualified name in function return type and name, then + // qualifies those to be valid in TargetContext. + findExplicitReferences(FD, [&](ReferenceLoc Ref) { + // It is enough to qualify the first qualifier, so skip references with a + // qualifier. Also we can't do much if there are no targets or name is + // inside a macro body. + if (Ref.Qualifier || Ref.Targets.empty() || Ref.NameLoc.isMacroID()) + return; + // Only qualify return type and function name. + if (Ref.NameLoc != FD->getReturnTypeSourceRange().getBegin() && + Ref.NameLoc != FD->getLocation()) + return; + + for (const NamedDecl *ND : Ref.Targets) { + if (ND->getDeclContext() != Ref.Targets.front()->getDeclContext()) { + elog("Targets from multiple contexts: {0}, {1}", + printQualifiedName(*Ref.Targets.front()), printQualifiedName(*ND)); + return; + } + } + const NamedDecl *ND = Ref.Targets.front(); + const std::string Qualifier = + getQualification(FD->getASTContext(), *TargetContext, + SM.getLocForStartOfFile(SM.getMainFileID()), ND); + if (auto Err = QualifierInsertions.add( + tooling::Replacement(SM, Ref.NameLoc, 0, Qualifier))) + Errors = llvm::joinErrors(std::move(Errors), std::move(Err)); + }); + + if (Errors) + return std::move(Errors); + return getFunctionSourceAfterReplacements(FD, QualifierInsertions); +} + +struct InsertionPoint { + std::string EnclosingNamespace; + size_t Offset; +}; +// Returns the most natural insertion point for \p QualifiedName in \p Contents. +// This currently cares about only the namespace proximity, but in feature it +// should also try to follow ordering of declarations. For example, if decls +// come in order `foo, bar, baz` then this function should return some point +// between foo and baz for inserting bar. +llvm::Expected +getInsertionPoint(llvm::StringRef Contents, llvm::StringRef QualifiedName, + const format::FormatStyle &Style) { + auto Region = getEligiblePoints(Contents, QualifiedName, Style); + + assert(!Region.EligiblePoints.empty()); + // FIXME: This selection can be made smarter by looking at the definition + // locations for adjacent decls to Source. Unfortunately psudeo parsing in + // getEligibleRegions only knows about namespace begin/end events so we + // can't match function start/end positions yet. + auto Offset = positionToOffset(Contents, Region.EligiblePoints.back()); + if (!Offset) + return Offset.takeError(); + return InsertionPoint{Region.EnclosingNamespace, *Offset}; +} + +/// Moves definition of a function/method to an appropriate implementation file. +/// +/// Before: +/// a.h +/// void foo() { return; } +/// a.cc +/// #include "a.h" +/// +/// ---------------- +/// +/// After: +/// a.h +/// void foo(); +/// a.cc +/// #include "a.h" +/// void foo() { return; } +class DefineOutline : public Tweak { +public: + const char *id() const override; + + bool hidden() const override { return true; } + Intent intent() const override { return Intent::Refactor; } + std::string title() const override { + return "Move function body to out-of-line."; + } + + bool prepare(const Selection &Sel) override { + // Bail out if we are not in a header file. + // FIXME: We might want to consider moving method definitions below class + // definition even if we are inside a source file. + if (!isHeaderFile(Sel.AST.getSourceManager().getFilename(Sel.Cursor), + Sel.AST.getLangOpts())) + return false; + + Source = getSelectedFunction(Sel.ASTSelection.commonAncestor()); + // Bail out if the selection is not a in-line function definition. + if (!Source || !Source->doesThisDeclarationHaveABody() || + Source->isOutOfLine()) + return false; + + // Bail out in templated classes, as it is hard to spell the class name, i.e + // if the template parameter is unnamed. + if (auto *MD = llvm::dyn_cast(Source)) { + if (MD->getParent()->isTemplated()) + return false; + } + + // Note that we don't check whether an implementation file exists or not in + // the prepare, since performing disk IO on each prepare request might be + // expensive. + return true; + } + + Expected apply(const Selection &Sel) override { + const SourceManager &SM = Sel.AST.getSourceManager(); + auto MainFileName = + getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM); + if (!MainFileName) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "Couldn't get absolute path for mainfile."); + + auto CCFile = getSourceFile(*MainFileName, Sel); + if (!CCFile) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "Couldn't find a suitable implementation file."); + + auto &FS = + Sel.AST.getSourceManager().getFileManager().getVirtualFileSystem(); + auto Buffer = FS.getBufferForFile(*CCFile); + // FIXME: Maybe we should consider creating the implementation file if it + // doesn't exist? + if (!Buffer) + return llvm::createStringError(Buffer.getError(), + Buffer.getError().message()); + auto Contents = Buffer->get()->getBuffer(); + auto InsertionPoint = + getInsertionPoint(Contents, Source->getQualifiedNameAsString(), + getFormatStyleForFile(*CCFile, Contents, &FS)); + if (!InsertionPoint) + return InsertionPoint.takeError(); + + auto FuncDef = + getFunctionSourceCode(Source, InsertionPoint->EnclosingNamespace); + if (!FuncDef) + return FuncDef.takeError(); + + SourceManagerForFile SMFF(*CCFile, Contents); + const tooling::Replacement InsertFunctionDef( + *CCFile, InsertionPoint->Offset, 0, *FuncDef); + auto Effect = Effect::mainFileEdit( + SMFF.get(), tooling::Replacements(InsertFunctionDef)); + if (!Effect) + return Effect.takeError(); + + // FIXME: We should also get rid of inline qualifier. + const tooling::Replacement DeleteFuncBody( + Sel.AST.getSourceManager(), + CharSourceRange::getTokenRange(*toHalfOpenFileRange( + SM, Sel.AST.getLangOpts(), Source->getBody()->getSourceRange())), + ";"); + auto HeaderFE = Effect::fileEdit(SM, SM.getMainFileID(), + tooling::Replacements(DeleteFuncBody)); + if (!HeaderFE) + return HeaderFE.takeError(); + + Effect->ApplyEdits.try_emplace(HeaderFE->first, + std::move(HeaderFE->second)); + return std::move(*Effect); + } + +private: + const FunctionDecl *Source = nullptr; +}; + +REGISTER_TWEAK(DefineOutline); + +} // namespace +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp index 1551f41a13184..ce9addb293bf9 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp @@ -645,7 +645,7 @@ tooling::Replacement createFunctionDefinition(const NewFunction &ExtractedFunc, bool ExtractFunction::prepare(const Selection &Inputs) { const Node *CommonAnc = Inputs.ASTSelection.commonAncestor(); const SourceManager &SM = Inputs.AST.getSourceManager(); - const LangOptions &LangOpts = Inputs.AST.getASTContext().getLangOpts(); + const LangOptions &LangOpts = Inputs.AST.getLangOpts(); if (auto MaybeExtZone = findExtractionZone(CommonAnc, SM, LangOpts)) { ExtZone = std::move(*MaybeExtZone); return true; @@ -655,7 +655,7 @@ bool ExtractFunction::prepare(const Selection &Inputs) { Expected ExtractFunction::apply(const Selection &Inputs) { const SourceManager &SM = Inputs.AST.getSourceManager(); - const LangOptions &LangOpts = Inputs.AST.getASTContext().getLangOpts(); + const LangOptions &LangOpts = Inputs.AST.getLangOpts(); auto ExtractedFunc = getExtractedFunction(ExtZone, SM, LangOpts); // FIXME: Add more types of errors. if (!ExtractedFunc) diff --git a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp new file mode 100644 index 0000000000000..62d0c6a2d20c6 --- /dev/null +++ b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp @@ -0,0 +1,85 @@ +//===--- ObjcLocalizeStringLiteral.cpp ---------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Logger.h" +#include "ParsedAST.h" +#include "SourceCode.h" +#include "refactor/Tweak.h" +#include "clang/AST/ExprObjC.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" + +namespace clang { +namespace clangd { +namespace { + +/// Wraps an Objective-C string literal with the NSLocalizedString macro. +/// Before: +/// @"description" +/// ^^^ +/// After: +/// NSLocalizedString(@"description", @"") +class ObjCLocalizeStringLiteral : public Tweak { +public: + const char *id() const override final; + Intent intent() const override { return Intent::Refactor; } + + bool prepare(const Selection &Inputs) override; + Expected apply(const Selection &Inputs) override; + std::string title() const override; + +private: + const clang::ObjCStringLiteral *Str = nullptr; +}; + +REGISTER_TWEAK(ObjCLocalizeStringLiteral) + +bool ObjCLocalizeStringLiteral::prepare(const Selection &Inputs) { + const SelectionTree::Node *N = Inputs.ASTSelection.commonAncestor(); + if (!N) + return false; + // Allow the refactoring even if the user selected only the C string part + // of the expression. + if (N->ASTNode.get()) { + if (N->Parent) + N = N->Parent; + } + Str = dyn_cast_or_null(N->ASTNode.get()); + return Str; +} + +Expected +ObjCLocalizeStringLiteral::apply(const Selection &Inputs) { + auto &SM = Inputs.AST.getSourceManager(); + auto &LangOpts = Inputs.AST.getASTContext().getLangOpts(); + auto Reps = tooling::Replacements(tooling::Replacement( + SM, CharSourceRange::getCharRange(Str->getBeginLoc()), + "NSLocalizedString(", LangOpts)); + SourceLocation EndLoc = Lexer::getLocForEndOfToken( + Str->getEndLoc(), 0, Inputs.AST.getSourceManager(), LangOpts); + if (auto Err = Reps.add(tooling::Replacement( + SM, CharSourceRange::getCharRange(EndLoc), ", @\"\")", LangOpts))) + return std::move(Err); + return Effect::mainFileEdit(SM, std::move(Reps)); +} + +std::string ObjCLocalizeStringLiteral::title() const { + return "Wrap in NSLocalizedString"; +} + +} // namespace +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp index 42d0122b33824..2d4bf755f64f5 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp @@ -91,7 +91,7 @@ Expected RawStringLiteral::apply(const Selection &Inputs) { auto &SM = Inputs.AST.getSourceManager(); auto Reps = tooling::Replacements( tooling::Replacement(SM, Str, ("R\"(" + Str->getBytes() + ")\"").str(), - Inputs.AST.getASTContext().getLangOpts())); + Inputs.AST.getLangOpts())); return Effect::mainFileEdit(SM, std::move(Reps)); } diff --git a/clang-tools-extra/clangd/test/exit-eof.test b/clang-tools-extra/clangd/test/exit-eof.test new file mode 100644 index 0000000000000..06d2ea87ff480 --- /dev/null +++ b/clang-tools-extra/clangd/test/exit-eof.test @@ -0,0 +1,7 @@ +# RUN: not clangd -sync < %s 2> %t.err +# RUN: FileCheck %s < %t.err +# +# No LSP messages here, just let clangd see the end-of-file +# CHECK: Transport error: +# (Typically "Transport error: Input/output error" but platform-dependent). + diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 2639df31dbe8d..b8385a0c9e5d5 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -11,6 +11,7 @@ #include "Features.inc" #include "Path.h" #include "Protocol.h" +#include "Shutdown.h" #include "Trace.h" #include "Transport.h" #include "index/Background.h" @@ -35,6 +36,10 @@ #include #include +#ifndef _WIN32 +#include +#endif + namespace clang { namespace clangd { namespace { @@ -264,6 +269,16 @@ list TweakList{ CommaSeparated, }; +opt CrossFileRename{ + "cross-file-rename", + cat(Features), + desc("Enable cross-file rename feature. Note that this feature is " + "experimental and may lead to broken code or incomplete rename " + "results"), + init(false), + Hidden, +}; + opt WorkerThreadsCount{ "j", cat(Misc), @@ -435,6 +450,7 @@ int main(int argc, char *argv[]) { llvm::InitializeAllTargetInfos(); llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + llvm::sys::SetInterruptFunction(&requestShutdown); llvm::cl::SetVersionPrinter([](llvm::raw_ostream &OS) { OS << clang::getClangToolFullVersion("clangd") << "\n"; }); @@ -531,6 +547,10 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var LoggingSession LoggingSession(Logger); // Write some initial logs before we start doing any real work. log("{0}", clang::getClangToolFullVersion("clangd")); +// FIXME: abstract this better, and print PID on windows too. +#ifndef _WIN32 + log("PID: {0}", getpid()); +#endif { SmallString<128> CWD; if (auto Err = llvm::sys::fs::current_path(CWD)) @@ -595,6 +615,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var } Opts.StaticIndex = StaticIdx.get(); Opts.AsyncThreadsCount = WorkerThreadsCount; + Opts.CrossFileRename = CrossFileRename; clangd::CodeCompleteOptions CCOpts; CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; @@ -683,12 +704,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // However if a bug causes them to run forever, we want to ensure the process // eventually exits. As clangd isn't directly user-facing, an editor can // "leak" clangd processes. Crashing in this case contains the damage. - // - // This is more portable than sys::WatchDog, and yields a stack trace. - std::thread([] { - std::this_thread::sleep_for(std::chrono::minutes(5)); - std::abort(); - }).detach(); + abortAfterTimeout(std::chrono::minutes(5)); return ExitCode; } diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index cb6d611503199..28f18e73d7a85 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1874,7 +1874,10 @@ TEST(CompletionTest, CompletionTokenRange) { Annotations TestCode(Text); auto Results = completions(Server, TestCode.code(), TestCode.point()); - EXPECT_EQ(Results.Completions.size(), 1u); + if (Results.Completions.size() != 1) { + ADD_FAILURE() << "Results.Completions.size() != 1"; + continue; + } EXPECT_THAT(Results.Completions.front().CompletionTokenRange, TestCode.range()); } diff --git a/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp b/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp index d4438e0a9a0b0..8eee7550bf8e4 100644 --- a/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp +++ b/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp @@ -88,7 +88,7 @@ TEST(CollectMainFileMacros, SelectedMacros) { break; auto Loc = getBeginningOfIdentifier(ExpectedRefs.begin()->start, SM, - AST.getASTContext().getLangOpts()); + AST.getLangOpts()); auto Macro = locateMacroAt(Loc, PP); assert(Macro); auto SID = getSymbolID(Macro->Name, Macro->Info, SM); diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index fe7a8898c5de4..3c0257849021d 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -709,7 +709,10 @@ void bar(X *x) { auto Parsed = TU.build(); for (const auto &D : Parsed.getDiagnostics()) { - EXPECT_EQ(D.Fixes.size(), 1u); + if (D.Fixes.size() != 1) { + ADD_FAILURE() << "D.Fixes.size() != 1"; + continue; + } EXPECT_EQ(D.Fixes[0].Message, std::string("Add include \"a.h\" for symbol X")); } diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index f6e5fe723ec71..620eb3d6d3d69 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -114,6 +114,23 @@ TEST_F(TargetDeclTest, Exprs) { auto X = S() [[+]] S(); )cpp"; EXPECT_DECLS("DeclRefExpr", "S operator+(S) const"); + + Code = R"cpp( + int foo(); + int s = foo[[()]]; + )cpp"; + EXPECT_DECLS("CallExpr", "int foo()"); + + Code = R"cpp( + struct X { + void operator()(int n); + }; + void test() { + X x; + x[[(123)]]; + } + )cpp"; + EXPECT_DECLS("CXXOperatorCallExpr", "void operator()(int n)"); } TEST_F(TargetDeclTest, UsingDecl) { diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 8dedcf579fd33..8a54b552258c1 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -7,10 +7,14 @@ //===----------------------------------------------------------------------===// #include "Annotations.h" +#include "ClangdServer.h" +#include "SyncAPI.h" #include "TestFS.h" #include "TestTU.h" +#include "index/Ref.h" #include "refactor/Rename.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/MemoryBuffer.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -18,8 +22,45 @@ namespace clang { namespace clangd { namespace { -MATCHER_P2(RenameRange, Code, Range, "") { - return replacementToEdit(Code, arg).range == Range; +using testing::Eq; +using testing::Pair; +using testing::UnorderedElementsAre; + +// Build a RefSlab from all marked ranges in the annotation. The ranges are +// assumed to associate with the given SymbolName. +std::unique_ptr buildRefSlab(const Annotations &Code, + llvm::StringRef SymbolName, + llvm::StringRef Path) { + RefSlab::Builder Builder; + TestTU TU; + TU.HeaderCode = Code.code(); + auto Symbols = TU.headerSymbols(); + const auto &SymbolID = findSymbol(Symbols, SymbolName).ID; + for (const auto &Range : Code.ranges()) { + Ref R; + R.Kind = RefKind::Reference; + R.Location.Start.setLine(Range.start.line); + R.Location.Start.setColumn(Range.start.character); + R.Location.End.setLine(Range.end.line); + R.Location.End.setColumn(Range.end.character); + auto U = URI::create(Path).toString(); + R.Location.FileURI = U.c_str(); + Builder.insert(SymbolID, R); + } + + return std::make_unique(std::move(Builder).build()); +} + +std::vector< + std::pair> +applyEdits(FileEdits FE) { + std::vector> Results; + for (auto &It : FE) + Results.emplace_back( + It.first().str(), + llvm::cantFail(tooling::applyAllReplacements( + It.getValue().InitialCode, It.getValue().Replacements))); + return Results; } // Generates an expected rename result by replacing all ranges in the given @@ -363,11 +404,11 @@ TEST(RenameTest, WithinFileRename) { llvm::StringRef NewName = "abcde"; for (const auto &RenamePos : Code.points()) { auto RenameResult = - renameWithinFile(AST, testPath(TU.Filename), RenamePos, NewName); - ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError() << T; - auto ApplyResult = llvm::cantFail( - tooling::applyAllReplacements(Code.code(), *RenameResult)); - EXPECT_EQ(expectedResult(Code, NewName), ApplyResult); + rename({RenamePos, NewName, AST, testPath(TU.Filename)}); + ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError(); + ASSERT_EQ(1u, RenameResult->size()); + EXPECT_EQ(applyEdits(std::move(*RenameResult)).front().second, + expectedResult(Code, NewName)); } } } @@ -411,13 +452,20 @@ TEST(RenameTest, Renameable) { )cpp", "used outside main file", HeaderFile, Index}, - {R"cpp(// disallow -- symbol is not indexable. + {R"cpp(// disallow -- symbol in annonymous namespace in header is not indexable. namespace { class Unin^dexable {}; } )cpp", "not eligible for indexing", HeaderFile, Index}, + {R"cpp(// allow -- symbol in annonymous namespace in non-header file is indexable. + namespace { + class [[F^oo]] {}; + } + )cpp", + nullptr, !HeaderFile, Index}, + {R"cpp(// disallow -- namespace symbol isn't supported namespace n^s {} )cpp", @@ -480,23 +528,23 @@ TEST(RenameTest, Renameable) { } auto AST = TU.build(); llvm::StringRef NewName = "dummyNewName"; - auto Results = renameWithinFile(AST, testPath(TU.Filename), T.point(), - NewName, Case.Index); + auto Results = + rename({T.point(), NewName, AST, testPath(TU.Filename), Case.Index}); bool WantRename = true; if (T.ranges().empty()) WantRename = false; if (!WantRename) { assert(Case.ErrorMessage && "Error message must be set!"); EXPECT_FALSE(Results) - << "expected renameWithinFile returned an error: " << T.code(); + << "expected rename returned an error: " << T.code(); auto ActualMessage = llvm::toString(Results.takeError()); EXPECT_THAT(ActualMessage, testing::HasSubstr(Case.ErrorMessage)); } else { - EXPECT_TRUE(bool(Results)) << "renameWithinFile returned an error: " + EXPECT_TRUE(bool(Results)) << "rename returned an error: " << llvm::toString(Results.takeError()); - auto ApplyResult = - llvm::cantFail(tooling::applyAllReplacements(T.code(), *Results)); - EXPECT_EQ(expectedResult(T, NewName), ApplyResult); + ASSERT_EQ(1u, Results->size()); + EXPECT_EQ(applyEdits(std::move(*Results)).front().second, + expectedResult(T, NewName)); } } } @@ -522,11 +570,287 @@ TEST(RenameTest, MainFileReferencesOnly) { llvm::StringRef NewName = "abcde"; auto RenameResult = - renameWithinFile(AST, testPath(TU.Filename), Code.point(), NewName); + rename({Code.point(), NewName, AST, testPath(TU.Filename)}); ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError() << Code.point(); - auto ApplyResult = - llvm::cantFail(tooling::applyAllReplacements(Code.code(), *RenameResult)); - EXPECT_EQ(expectedResult(Code, NewName), ApplyResult); + ASSERT_EQ(1u, RenameResult->size()); + EXPECT_EQ(applyEdits(std::move(*RenameResult)).front().second, + expectedResult(Code, NewName)); +} + +TEST(CrossFileRenameTests, DirtyBuffer) { + Annotations FooCode("class [[Foo]] {};"); + std::string FooPath = testPath("foo.cc"); + Annotations FooDirtyBuffer("class [[Foo]] {};\n// this is dirty buffer"); + Annotations BarCode("void [[Bar]]() {}"); + std::string BarPath = testPath("bar.cc"); + // Build the index, the index has "Foo" references from foo.cc and "Bar" + // references from bar.cc. + FileSymbols FSymbols; + FSymbols.update(FooPath, nullptr, buildRefSlab(FooCode, "Foo", FooPath), + nullptr, false); + FSymbols.update(BarPath, nullptr, buildRefSlab(BarCode, "Bar", BarPath), + nullptr, false); + auto Index = FSymbols.buildIndex(IndexType::Light); + + Annotations MainCode("class [[Fo^o]] {};"); + auto MainFilePath = testPath("main.cc"); + // Dirty buffer for foo.cc. + auto GetDirtyBuffer = [&](PathRef Path) -> llvm::Optional { + if (Path == FooPath) + return FooDirtyBuffer.code().str(); + return llvm::None; + }; + + // Run rename on Foo, there is a dirty buffer for foo.cc, rename should + // respect the dirty buffer. + TestTU TU = TestTU::withCode(MainCode.code()); + auto AST = TU.build(); + llvm::StringRef NewName = "newName"; + auto Results = rename({MainCode.point(), NewName, AST, MainFilePath, + Index.get(), /*CrossFile=*/true, GetDirtyBuffer}); + ASSERT_TRUE(bool(Results)) << Results.takeError(); + EXPECT_THAT( + applyEdits(std::move(*Results)), + UnorderedElementsAre( + Pair(Eq(FooPath), Eq(expectedResult(FooDirtyBuffer, NewName))), + Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); + + // Run rename on Bar, there is no dirty buffer for the affected file bar.cc, + // so we should read file content from VFS. + MainCode = Annotations("void [[Bar]]() { [[B^ar]](); }"); + TU = TestTU::withCode(MainCode.code()); + // Set a file "bar.cc" on disk. + TU.AdditionalFiles["bar.cc"] = BarCode.code(); + AST = TU.build(); + Results = rename({MainCode.point(), NewName, AST, MainFilePath, Index.get(), + /*CrossFile=*/true, GetDirtyBuffer}); + ASSERT_TRUE(bool(Results)) << Results.takeError(); + EXPECT_THAT( + applyEdits(std::move(*Results)), + UnorderedElementsAre( + Pair(Eq(BarPath), Eq(expectedResult(BarCode, NewName))), + Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); + + // Run rename on a pagination index which couldn't return all refs in one + // request, we reject rename on this case. + class PaginationIndex : public SymbolIndex { + bool refs(const RefsRequest &Req, + llvm::function_ref Callback) const override { + return true; // has more references + } + + bool fuzzyFind( + const FuzzyFindRequest &Req, + llvm::function_ref Callback) const override { + return false; + } + void + lookup(const LookupRequest &Req, + llvm::function_ref Callback) const override {} + + void relations(const RelationsRequest &Req, + llvm::function_ref + Callback) const override {} + size_t estimateMemoryUsage() const override { return 0; } + } PIndex; + Results = rename({MainCode.point(), NewName, AST, MainFilePath, &PIndex, + /*CrossFile=*/true, GetDirtyBuffer}); + EXPECT_FALSE(Results); + EXPECT_THAT(llvm::toString(Results.takeError()), + testing::HasSubstr("too many occurrences")); +} + +TEST(CrossFileRenameTests, WithUpToDateIndex) { + MockCompilationDatabase CDB; + CDB.ExtraClangFlags = {"-xc++"}; + class IgnoreDiagnostics : public DiagnosticsConsumer { + void onDiagnosticsReady(PathRef File, + std::vector Diagnostics) override {} + } DiagConsumer; + // rename is runnning on the "^" point in FooH, and "[[]]" ranges are the + // expcted rename occurrences. + struct Case { + llvm::StringRef FooH; + llvm::StringRef FooCC; + } Cases [] = { + { + // classes. + R"cpp( + class [[Fo^o]] { + [[Foo]](); + ~[[Foo]](); + }; + )cpp", + R"cpp( + #include "foo.h" + [[Foo]]::[[Foo]]() {} + [[Foo]]::~[[Foo]]() {} + + void func() { + [[Foo]] foo; + } + )cpp", + }, + { + // class methods. + R"cpp( + class Foo { + void [[f^oo]](); + }; + )cpp", + R"cpp( + #include "foo.h" + void Foo::[[foo]]() {} + + void func(Foo* p) { + p->[[foo]](); + } + )cpp", + }, + { + // functions. + R"cpp( + void [[f^oo]](); + )cpp", + R"cpp( + #include "foo.h" + void [[foo]]() {} + + void func() { + [[foo]](); + } + )cpp", + }, + { + // typedefs. + R"cpp( + typedef int [[IN^T]]; + [[INT]] foo(); + )cpp", + R"cpp( + #include "foo.h" + [[INT]] foo() {} + )cpp", + }, + { + // usings. + R"cpp( + using [[I^NT]] = int; + [[INT]] foo(); + )cpp", + R"cpp( + #include "foo.h" + [[INT]] foo() {} + )cpp", + }, + { + // variables. + R"cpp( + static const int [[VA^R]] = 123; + )cpp", + R"cpp( + #include "foo.h" + int s = [[VAR]]; + )cpp", + }, + { + // scope enums. + R"cpp( + enum class [[K^ind]] { ABC }; + )cpp", + R"cpp( + #include "foo.h" + [[Kind]] ff() { + return [[Kind]]::ABC; + } + )cpp", + }, + { + // enum constants. + R"cpp( + enum class Kind { [[A^BC]] }; + )cpp", + R"cpp( + #include "foo.h" + Kind ff() { + return Kind::[[ABC]]; + } + )cpp", + }, + }; + + for (const auto& T : Cases) { + Annotations FooH(T.FooH); + Annotations FooCC(T.FooCC); + std::string FooHPath = testPath("foo.h"); + std::string FooCCPath = testPath("foo.cc"); + + MockFSProvider FS; + FS.Files[FooHPath] = FooH.code(); + FS.Files[FooCCPath] = FooCC.code(); + + auto ServerOpts = ClangdServer::optsForTest(); + ServerOpts.CrossFileRename = true; + ServerOpts.BuildDynamicSymbolIndex = true; + ClangdServer Server(CDB, FS, DiagConsumer, ServerOpts); + + // Add all files to clangd server to make sure the dynamic index has been + // built. + runAddDocument(Server, FooHPath, FooH.code()); + runAddDocument(Server, FooCCPath, FooCC.code()); + + llvm::StringRef NewName = "NewName"; + auto FileEditsList = + llvm::cantFail(runRename(Server, FooHPath, FooH.point(), NewName)); + EXPECT_THAT(applyEdits(std::move(FileEditsList)), + UnorderedElementsAre( + Pair(Eq(FooHPath), Eq(expectedResult(T.FooH, NewName))), + Pair(Eq(FooCCPath), Eq(expectedResult(T.FooCC, NewName))))); + } +} + +TEST(CrossFileRenameTests, CrossFileOnLocalSymbol) { + // cross-file rename should work for function-local symbols, even there is no + // index provided. + Annotations Code("void f(int [[abc]]) { [[a^bc]] = 3; }"); + auto TU = TestTU::withCode(Code.code()); + auto Path = testPath(TU.Filename); + auto AST = TU.build(); + llvm::StringRef NewName = "newName"; + auto Results = rename({Code.point(), NewName, AST, Path}); + ASSERT_TRUE(bool(Results)) << Results.takeError(); + EXPECT_THAT( + applyEdits(std::move(*Results)), + UnorderedElementsAre(Pair(Eq(Path), Eq(expectedResult(Code, NewName))))); +} + +TEST(CrossFileRenameTests, BuildRenameEdits) { + Annotations Code("[[😂]]"); + auto LSPRange = Code.range(); + llvm::StringRef FilePath = "/test/TestTU.cpp"; + auto Edit = buildRenameEdit(FilePath, Code.code(), {LSPRange}, "abc"); + ASSERT_TRUE(bool(Edit)) << Edit.takeError(); + ASSERT_EQ(1UL, Edit->Replacements.size()); + EXPECT_EQ(FilePath, Edit->Replacements.begin()->getFilePath()); + EXPECT_EQ(4UL, Edit->Replacements.begin()->getLength()); + + // Test invalid range. + LSPRange.end = {10, 0}; // out of range + Edit = buildRenameEdit(FilePath, Code.code(), {LSPRange}, "abc"); + EXPECT_FALSE(Edit); + EXPECT_THAT(llvm::toString(Edit.takeError()), + testing::HasSubstr("fail to convert")); + + // Normal ascii characters. + Annotations T(R"cpp( + [[range]] + [[range]] + [[range]] + )cpp"); + Edit = buildRenameEdit(FilePath, T.code(), T.ranges(), "abc"); + ASSERT_TRUE(bool(Edit)) << Edit.takeError(); + EXPECT_EQ(applyEdits(FileEdits{{T.code(), std::move(*Edit)}}).front().second, + expectedResult(Code, expectedResult(T, "abc"))); } } // namespace diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index 2803aaaca1c57..9e1a90b55e3ac 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -40,7 +40,7 @@ Range nodeRange(const SelectionTree::Node *N, ParsedAST &AST) { if (!N) return Range{}; const SourceManager &SM = AST.getSourceManager(); - const LangOptions &LangOpts = AST.getASTContext().getLangOpts(); + const LangOptions &LangOpts = AST.getLangOpts(); StringRef Buffer = SM.getBufferData(SM.getMainFileID()); if (llvm::isa_and_nonnull(N->ASTNode.get())) return Range{Position{}, offsetToPosition(Buffer, Buffer.size())}; @@ -134,6 +134,15 @@ TEST(SelectionTest, CommonAncestor) { )cpp", "IfStmt", }, + { + R"cpp( + int x(int); + #define M(foo) x(foo) + int a = 42; + int b = M([[^a]]); + )cpp", + "DeclRefExpr", + }, { R"cpp( void foo(); @@ -234,6 +243,7 @@ TEST(SelectionTest, CommonAncestor) { {"void foo() { [[foo^()]]; }", "CallExpr"}, {"void foo() { [[foo^]] (); }", "DeclRefExpr"}, {"int bar; void foo() [[{ foo (); }]]^", "CompoundStmt"}, + {"int x = [[42]]^;", "IntegerLiteral"}, // Ignores whitespace, comments, and semicolons in the selection. {"void foo() { [[foo^()]]; /*comment*/^}", "CallExpr"}, @@ -271,7 +281,6 @@ TEST(SelectionTest, CommonAncestor) { // FIXME: Ideally we'd get a declstmt or the VarDecl itself here. // This doesn't happen now; the RAV doesn't traverse a node containing ;. {"int x = 42;^", nullptr}, - {"int x = 42^;", nullptr}, // Common ancestor is logically TUDecl, but we never return that. {"^int x; int y;^", nullptr}, @@ -378,6 +387,7 @@ TEST(SelectionTest, Selected) { $C[[return]]; }]] else [[{^ }]]]] + char z; } )cpp", R"cpp( @@ -386,10 +396,10 @@ TEST(SelectionTest, Selected) { void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {} )cpp", R"cpp(int a = [[5 >^> 1]];)cpp", - R"cpp([[ + R"cpp( #define ECHO(X) X - ECHO(EC^HO([[$C[[int]]) EC^HO(a]])); - ]])cpp", + ECHO(EC^HO($C[[int]]) EC^HO(a)); + )cpp", R"cpp( $C[[^$C[[int]] a^]]; )cpp", R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp", }; @@ -428,6 +438,56 @@ TEST(SelectionTest, PathologicalPreprocessor) { EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind()); } +TEST(SelectionTest, IncludedFile) { + const char *Case = R"cpp( + void test() { +#include "Exp^and.inc" + break; + } + )cpp"; + Annotations Test(Case); + auto TU = TestTU::withCode(Test.code()); + TU.AdditionalFiles["Expand.inc"] = "while(1)\n"; + auto AST = TU.build(); + auto T = makeSelectionTree(Case, AST); + + EXPECT_EQ("WhileStmt", T.commonAncestor()->kind()); +} + +TEST(SelectionTest, MacroArgExpansion) { + // If a macro arg is expanded several times, we consider them all selected. + const char *Case = R"cpp( + int mul(int, int); + #define SQUARE(X) mul(X, X); + int nine = SQUARE(^3); + )cpp"; + Annotations Test(Case); + auto AST = TestTU::withCode(Test.code()).build(); + auto T = makeSelectionTree(Case, AST); + // Unfortunately, this makes the common ancestor the CallExpr... + // FIXME: hack around this by picking one? + EXPECT_EQ("CallExpr", T.commonAncestor()->kind()); + EXPECT_FALSE(T.commonAncestor()->Selected); + EXPECT_EQ(2u, T.commonAncestor()->Children.size()); + for (const auto* N : T.commonAncestor()->Children) { + EXPECT_EQ("IntegerLiteral", N->kind()); + EXPECT_TRUE(N->Selected); + } + + // Verify that the common assert() macro doesn't suffer from this. + // (This is because we don't associate the stringified token with the arg). + Case = R"cpp( + void die(const char*); + #define assert(x) (x ? (void)0 : die(#x) + void foo() { assert(^42); } + )cpp"; + Test = Annotations(Case); + AST = TestTU::withCode(Test.code()).build(); + T = makeSelectionTree(Case, AST); + + EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind()); +} + TEST(SelectionTest, Implicit) { const char* Test = R"cpp( struct S { S(const char*); }; diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp index b9ca0273a8233..f518fea672920 100644 --- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp @@ -88,11 +88,8 @@ TEST(SemanticSelection, All) { R"cpp( // Single statement in TU. [[int v = [[1^00]]]]; )cpp", - // FIXME: No node found associated to the position. R"cpp( // Cursor at end of VarDecl. - void func() { - int v = 100 + 100^; - } + [[int v = [[100]]^]]; )cpp", // FIXME: No node found associated to the position. R"cpp( // Cursor in between spaces. diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp index 0dabce2a3d64d..5979261600bbf 100644 --- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp +++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp @@ -358,7 +358,7 @@ Bar* bar; auto AST = TestTU::withCode(TestCase.code()).build(); const auto &SourceMgr = AST.getSourceManager(); SourceLocation Actual = getBeginningOfIdentifier( - TestCase.points().back(), SourceMgr, AST.getASTContext().getLangOpts()); + TestCase.points().back(), SourceMgr, AST.getLangOpts()); Position ActualPos = offsetToPosition( TestCase.code(), SourceMgr.getFileOffset(SourceMgr.getSpellingLoc(Actual))); @@ -482,7 +482,7 @@ TEST(SourceCodeTests, GetMacros) { TestTU TU = TestTU::withCode(Code.code()); auto AST = TU.build(); auto Loc = getBeginningOfIdentifier(Code.point(), AST.getSourceManager(), - AST.getASTContext().getLangOpts()); + AST.getLangOpts()); auto Result = locateMacroAt(Loc, AST.getPreprocessor()); ASSERT_TRUE(Result); EXPECT_THAT(*Result, MacroName("MACRO")); @@ -548,7 +548,7 @@ TEST(SourceCodeTests, HalfOpenFileRange) { ParsedAST AST = TestTU::withCode(Test.code()).build(); llvm::errs() << Test.code(); const SourceManager &SM = AST.getSourceManager(); - const LangOptions &LangOpts = AST.getASTContext().getLangOpts(); + const LangOptions &LangOpts = AST.getLangOpts(); // Turn a SourceLocation into a pair of positions auto SourceRangeToRange = [&SM](SourceRange SrcRange) { return Range{sourceLocToPosition(SM, SrcRange.getBegin()), @@ -588,8 +588,7 @@ TEST(SourceCodeTests, HalfOpenFileRangePathologicalPreprocessor) { const auto &Body = cast(Func.getBody()); const auto &Loop = cast(*Body->child_begin()); llvm::Optional Range = toHalfOpenFileRange( - AST.getSourceManager(), AST.getASTContext().getLangOpts(), - Loop->getSourceRange()); + AST.getSourceManager(), AST.getLangOpts(), Loop->getSourceRange()); ASSERT_TRUE(Range) << "Failed to get file range"; EXPECT_EQ(AST.getSourceManager().getFileOffset(Range->getBegin()), Test.llvm::Annotations::range().Begin); diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index d737862fa0465..abc7aa389bd54 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -39,6 +39,7 @@ using ::testing::Contains; using ::testing::Each; using ::testing::ElementsAre; using ::testing::Field; +using ::testing::IsEmpty; using ::testing::Not; using ::testing::Pair; using ::testing::UnorderedElementsAre; @@ -214,7 +215,8 @@ class SymbolIndexActionFactory : public tooling::FrontendActionFactory { CreateASTConsumer(CompilerInstance &CI, llvm::StringRef InFile) override { if (PragmaHandler) CI.getPreprocessor().addCommentHandler(PragmaHandler); - return createIndexingASTConsumer(DataConsumer, Opts, CI.getPreprocessorPtr()); + return createIndexingASTConsumer(DataConsumer, Opts, + CI.getPreprocessorPtr()); } bool BeginInvocation(CompilerInstance &CI) override { @@ -577,15 +579,16 @@ o]](); TEST_F(SymbolCollectorTest, Refs) { Annotations Header(R"( - class $foo[[Foo]] { + #define MACRO(X) (X + 1) + class Foo { public: - $foo[[Foo]]() {} - $foo[[Foo]](int); + Foo() {} + Foo(int); }; - class $bar[[Bar]]; - void $func[[func]](); + class Bar; + void func(); - namespace $ns[[NS]] {} // namespace ref is ignored + namespace NS {} // namespace ref is ignored )"); Annotations Main(R"( class $bar[[Bar]] {}; @@ -598,19 +601,20 @@ TEST_F(SymbolCollectorTest, Refs) { $func[[func]](); int abc = 0; $foo[[Foo]] foo2 = abc; + abc = $macro[[MACRO]](1); } )"); Annotations SymbolsOnlyInMainCode(R"( + #define FUNC(X) (X+1) int a; void b() {} - static const int c = 0; + static const int c = FUNC(1); class d {}; )"); CollectorOpts.RefFilter = RefKind::All; + CollectorOpts.CollectMacro = true; runSymbolCollector(Header.code(), (Main.code() + SymbolsOnlyInMainCode.code()).str()); - auto HeaderSymbols = TestTU::withHeaderCode(Header.code()).headerSymbols(); - EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Foo").ID, HaveRanges(Main.ranges("foo"))))); EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Bar").ID, @@ -618,12 +622,82 @@ TEST_F(SymbolCollectorTest, Refs) { EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "func").ID, HaveRanges(Main.ranges("func"))))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(Symbols, "NS").ID, _)))); - // Symbols *only* in the main file (a, b, c) had no refs collected. + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "MACRO").ID, + HaveRanges(Main.ranges("macro"))))); + // Symbols *only* in the main file (a, b, c, FUNC) had no refs collected. auto MainSymbols = TestTU::withHeaderCode(SymbolsOnlyInMainCode.code()).headerSymbols(); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "a").ID, _)))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "b").ID, _)))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "c").ID, _)))); + EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "FUNC").ID, _)))); +} + +TEST_F(SymbolCollectorTest, MacroRefInHeader) { + Annotations Header(R"( + #define $foo[[FOO]](X) (X + 1) + #define $bar[[BAR]](X) (X + 2) + + // Macro defined multiple times. + #define $ud1[[UD]] 1 + int ud_1 = $ud1[[UD]]; + #undef UD + + #define $ud2[[UD]] 2 + int ud_2 = $ud2[[UD]]; + #undef UD + + // Macros from token concatenations not included. + #define $concat[[CONCAT]](X) X##A() + #define $prepend[[PREPEND]](X) MACRO##X() + #define $macroa[[MACROA]]() 123 + int B = $concat[[CONCAT]](MACRO); + int D = $prepend[[PREPEND]](A); + + void fff() { + int abc = $foo[[FOO]](1) + $bar[[BAR]]($foo[[FOO]](1)); + } + )"); + CollectorOpts.RefFilter = RefKind::All; + CollectorOpts.RefsInHeaders = true; + // Need this to get the SymbolID for macros for tests. + CollectorOpts.CollectMacro = true; + + runSymbolCollector(Header.code(), ""); + + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "FOO").ID, + HaveRanges(Header.ranges("foo"))))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "BAR").ID, + HaveRanges(Header.ranges("bar"))))); + // No unique ID for multiple symbols named UD. Check for ranges only. + EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Header.ranges("ud1"))))); + EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Header.ranges("ud2"))))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "CONCAT").ID, + HaveRanges(Header.ranges("concat"))))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "PREPEND").ID, + HaveRanges(Header.ranges("prepend"))))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "MACROA").ID, + HaveRanges(Header.ranges("macroa"))))); +} + +TEST_F(SymbolCollectorTest, MacroRefWithoutCollectingSymbol) { + Annotations Header(R"( + #define $foo[[FOO]](X) (X + 1) + int abc = $foo[[FOO]](1); + )"); + CollectorOpts.RefFilter = RefKind::All; + CollectorOpts.RefsInHeaders = true; + CollectorOpts.CollectMacro = false; + runSymbolCollector(Header.code(), ""); + EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Header.ranges("foo"))))); +} + +TEST_F(SymbolCollectorTest, MacrosWithRefFilter) { + Annotations Header("#define $macro[[MACRO]](X) (X + 1)"); + Annotations Main("void foo() { int x = $macro[[MACRO]](1); }"); + CollectorOpts.RefFilter = RefKind::Unknown; + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT(Refs, IsEmpty()); } TEST_F(SymbolCollectorTest, NameReferences) { @@ -675,21 +749,26 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { TestFileName = testPath("foo.hh"); runSymbolCollector("", Header.code()); EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Func"))); - EXPECT_THAT(Refs, UnorderedElementsAre(Pair(findSymbol(Symbols, "Foo").ID, - HaveRanges(Header.ranges("Foo"))), - Pair(findSymbol(Symbols, "Func").ID, - HaveRanges(Header.ranges("Func"))))); + EXPECT_THAT(Refs, + UnorderedElementsAre(Pair(findSymbol(Symbols, "Foo").ID, + HaveRanges(Header.ranges("Foo"))), + Pair(findSymbol(Symbols, "Func").ID, + HaveRanges(Header.ranges("Func"))))); } TEST_F(SymbolCollectorTest, RefsInHeaders) { CollectorOpts.RefFilter = RefKind::All; CollectorOpts.RefsInHeaders = true; + CollectorOpts.CollectMacro = true; Annotations Header(R"( - class [[Foo]] {}; + #define $macro[[MACRO]](x) (x+1) + class $foo[[Foo]] {}; )"); runSymbolCollector(Header.code(), ""); EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Foo").ID, - HaveRanges(Header.ranges())))); + HaveRanges(Header.ranges("foo"))))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "MACRO").ID, + HaveRanges(Header.ranges("macro"))))); } TEST_F(SymbolCollectorTest, Relations) { @@ -704,7 +783,7 @@ TEST_F(SymbolCollectorTest, Relations) { Contains(Relation{Base.ID, RelationKind::BaseOf, Derived.ID})); } -TEST_F(SymbolCollectorTest, References) { +TEST_F(SymbolCollectorTest, CountReferences) { const std::string Header = R"( class W; class X {}; diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.cpp b/clang-tools-extra/clangd/unittests/SyncAPI.cpp index 812fa7a0f2ecb..5c7949ab41baf 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.cpp +++ b/clang-tools-extra/clangd/unittests/SyncAPI.cpp @@ -96,11 +96,10 @@ runFindDocumentHighlights(ClangdServer &Server, PathRef File, Position Pos) { return std::move(*Result); } -llvm::Expected> runRename(ClangdServer &Server, - PathRef File, Position Pos, - llvm::StringRef NewName) { - llvm::Optional>> Result; - Server.rename(File, Pos, NewName, /*WantFormat=*/true, capture(Result)); +llvm::Expected runRename(ClangdServer &Server, PathRef File, + Position Pos, llvm::StringRef NewName) { + llvm::Optional> Result; + Server.rename(File, Pos, NewName, /*WantFormat=*/false, capture(Result)); return std::move(*Result); } diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.h b/clang-tools-extra/clangd/unittests/SyncAPI.h index 5ffed1fbb120c..55a538ef6a977 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.h +++ b/clang-tools-extra/clangd/unittests/SyncAPI.h @@ -38,8 +38,8 @@ runLocateSymbolAt(ClangdServer &Server, PathRef File, Position Pos); llvm::Expected> runFindDocumentHighlights(ClangdServer &Server, PathRef File, Position Pos); -llvm::Expected> -runRename(ClangdServer &Server, PathRef File, Position Pos, StringRef NewName); +llvm::Expected runRename(ClangdServer &Server, PathRef File, + Position Pos, StringRef NewName); std::string runDumpAST(ClangdServer &Server, PathRef File); diff --git a/clang-tools-extra/clangd/unittests/TweakTesting.cpp b/clang-tools-extra/clangd/unittests/TweakTesting.cpp index 3331a3d937155..7f9f75c081987 100644 --- a/clang-tools-extra/clangd/unittests/TweakTesting.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTesting.cpp @@ -63,12 +63,14 @@ std::pair rangeOrPoint(const Annotations &A) { cantFail(positionToOffset(A.code(), SelectionRng.end))}; } -MATCHER_P6(TweakIsAvailable, TweakID, Ctx, Header, ExtraArgs, ExtraFiles, Index, +MATCHER_P7(TweakIsAvailable, TweakID, Ctx, Header, ExtraArgs, ExtraFiles, Index, + FileName, (TweakID + (negation ? " is unavailable" : " is available")).str()) { std::string WrappedCode = wrap(Ctx, arg); Annotations Input(WrappedCode); auto Selection = rangeOrPoint(Input); TestTU TU; + TU.Filename = FileName; TU.HeaderCode = Header; TU.Code = Input.code(); TU.ExtraArgs = ExtraArgs; @@ -91,6 +93,7 @@ std::string TweakTest::apply(llvm::StringRef MarkedCode, auto Selection = rangeOrPoint(Input); TestTU TU; + TU.Filename = FileName; TU.HeaderCode = Header; TU.AdditionalFiles = std::move(ExtraFiles); TU.Code = Input.code(); @@ -124,7 +127,7 @@ std::string TweakTest::apply(llvm::StringRef MarkedCode, ADD_FAILURE() << "There were changes to additional files, but client " "provided a nullptr for EditedFiles."; else - EditedFiles->try_emplace(It.first(), Unwrapped.str()); + EditedFiles->insert_or_assign(It.first(), Unwrapped.str()); } } return EditedMainFile; @@ -132,7 +135,7 @@ std::string TweakTest::apply(llvm::StringRef MarkedCode, ::testing::Matcher TweakTest::isAvailable() const { return TweakIsAvailable(llvm::StringRef(TweakID), Context, Header, ExtraArgs, - ExtraFiles, Index.get()); + ExtraFiles, Index.get(), FileName); } std::vector TweakTest::expandCases(llvm::StringRef MarkedCode) { diff --git a/clang-tools-extra/clangd/unittests/TweakTesting.h b/clang-tools-extra/clangd/unittests/TweakTesting.h index ffcf5a0c7ea2a..10186f859bae2 100644 --- a/clang-tools-extra/clangd/unittests/TweakTesting.h +++ b/clang-tools-extra/clangd/unittests/TweakTesting.h @@ -12,6 +12,7 @@ #include "TestTU.h" #include "index/Index.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -62,6 +63,8 @@ class TweakTest : public ::testing::Test { // testcases. std::string Header; + llvm::StringRef FileName = "TestTU.cpp"; + // Extra flags passed to the compilation in apply(). std::vector ExtraArgs; diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index 4e481241acd8c..f45866a52bd53 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -122,6 +122,25 @@ literal)")cpp"; EXPECT_EQ(apply(Input), Output); } +TWEAK_TEST(ObjCLocalizeStringLiteral); +TEST_F(ObjCLocalizeStringLiteralTest, Test) { + ExtraArgs.push_back("-x"); + ExtraArgs.push_back("objective-c"); + + // Ensure the the action can be initiated in the string literal. + EXPECT_AVAILABLE(R"(id x = ^[[@[[^"^t^est^"]]]];)"); + + // Ensure that the action can't be initiated in other places. + EXPECT_UNAVAILABLE(R"([[i^d ^[[x]] ^= @"test";^]])"); + + // Ensure that the action is not available for regular C strings. + EXPECT_UNAVAILABLE(R"(const char * x= "^test";)"); + + const char *Input = R"(id x = [[@"test"]];)"; + const char *Output = R"(id x = NSLocalizedString(@"test", @"");)"; + EXPECT_EQ(apply(Input), Output); +} + TWEAK_TEST(DumpAST); TEST_F(DumpASTTest, Test) { EXPECT_AVAILABLE("^int f^oo() { re^turn 2 ^+ 2; }"); @@ -269,7 +288,7 @@ TEST_F(ExtractVariableTest, Test) { EXPECT_UNAVAILABLE(UnavailableCases); // vector of pairs of input and output strings - const std::vector> + const std::vector> InputOutputs = { // extraction from variable declaration/assignment {R"cpp(void varDecl() { @@ -321,17 +340,10 @@ TEST_F(ExtractVariableTest, Test) { if(1) LOOP(5 + [[3]]) })cpp", - /*FIXME: It should be extracted like this. SelectionTree needs to be - * fixed for macros. R"cpp(#define LOOP(x) while (1) {a = x;} - void f(int a) { - auto dummy = 3; if(1) - LOOP(5 + dummy) - })cpp"},*/ - R"cpp(#define LOOP(x) while (1) {a = x;} void f(int a) { - auto dummy = LOOP(5 + 3); if(1) - dummy + auto dummy = 3; if(1) + LOOP(5 + dummy) })cpp"}, {R"cpp(#define LOOP(x) do {x;} while(1); void f(int a) { @@ -644,13 +656,18 @@ void f(const int c) { )cpp"; EXPECT_EQ(apply(TemplateFailInput), "unavailable"); - // FIXME: This should be extractable after selectionTree works correctly for - // macros (currently it doesn't select anything for the following case) - std::string MacroFailInput = R"cpp( + std::string MacroInput = R"cpp( #define F(BODY) void f() { BODY } F ([[int x = 0;]]) )cpp"; - EXPECT_EQ(apply(MacroFailInput), "unavailable"); + std::string MacroOutput = R"cpp( + #define F(BODY) void f() { BODY } + void extracted() { +int x = 0; +} +F (extracted();) + )cpp"; + EXPECT_EQ(apply(MacroInput), MacroOutput); // Shouldn't crash. EXPECT_EQ(apply("void f([[int a]]);"), "unavailable"); @@ -1809,6 +1826,276 @@ TEST_F(DefineInlineTest, QualifyWithUsingDirectives) { EXPECT_EQ(apply(Test), Expected) << Test; } +TWEAK_TEST(DefineOutline); +TEST_F(DefineOutlineTest, TriggersOnFunctionDecl) { + FileName = "Test.cpp"; + // Not available unless in a header file. + EXPECT_UNAVAILABLE(R"cpp( + [[void [[f^o^o]]() [[{ + return; + }]]]])cpp"); + + FileName = "Test.hpp"; + // Not available unless function name or fully body is selected. + EXPECT_UNAVAILABLE(R"cpp( + // Not a definition + vo^i[[d^ ^f]]^oo(); + + [[vo^id ]]foo[[()]] {[[ + [[(void)(5+3); + return;]] + }]])cpp"); + + // Available even if there are no implementation files. + EXPECT_AVAILABLE(R"cpp( + [[void [[f^o^o]]() [[{ + return; + }]]]])cpp"); + + // Not available for out-of-line methods. + EXPECT_UNAVAILABLE(R"cpp( + class Bar { + void baz(); + }; + + [[void [[Bar::[[b^a^z]]]]() [[{ + return; + }]]]])cpp"); + + // Basic check for function body and signature. + EXPECT_AVAILABLE(R"cpp( + class Bar { + [[void [[f^o^o]]() [[{ return; }]]]] + }; + + void foo(); + [[void [[f^o^o]]() [[{ + return; + }]]]])cpp"); + + // Not available on defaulted/deleted members. + EXPECT_UNAVAILABLE(R"cpp( + class Foo { + Fo^o() = default; + F^oo(const Foo&) = delete; + };)cpp"); + + // Not available within templated classes, as it is hard to spell class name + // out-of-line in such cases. + EXPECT_UNAVAILABLE(R"cpp( + template struct Foo { void fo^o(){} }; + })cpp"); +} + +TEST_F(DefineOutlineTest, FailsWithoutSource) { + FileName = "Test.hpp"; + llvm::StringRef Test = "void fo^o() { return; }"; + llvm::StringRef Expected = + "fail: Couldn't find a suitable implementation file."; + EXPECT_EQ(apply(Test), Expected); +} + +TEST_F(DefineOutlineTest, ApplyTest) { + llvm::StringMap EditedFiles; + ExtraFiles["Test.cpp"] = ""; + FileName = "Test.hpp"; + // Template body is not parsed until instantiation time on windows, which + // results in arbitrary failures as function body becomes NULL. + ExtraArgs.push_back("-fno-delayed-template-parsing"); + + struct { + llvm::StringRef Test; + llvm::StringRef ExpectedHeader; + llvm::StringRef ExpectedSource; + } Cases[] = { + // Simple check + { + "void fo^o() { return; }", + "void foo() ;", + "void foo() { return; }", + }, + // Templated function. + { + "template void fo^o(T, T x) { return; }", + "template void foo(T, T x) ;", + "template void foo(T, T x) { return; }", + }, + { + "template void fo^o() { return; }", + "template void foo() ;", + "template void foo() { return; }", + }, + // Template specialization. + { + R"cpp( + template void foo(); + template <> void fo^o() { return; })cpp", + R"cpp( + template void foo(); + template <> void foo() ;)cpp", + "template <> void foo() { return; }", + }, + }; + for (const auto &Case : Cases) { + SCOPED_TRACE(Case.Test); + EXPECT_EQ(apply(Case.Test, &EditedFiles), Case.ExpectedHeader); + EXPECT_THAT(EditedFiles, testing::ElementsAre(FileWithContents( + testPath("Test.cpp"), Case.ExpectedSource))); + } +} + +TEST_F(DefineOutlineTest, HandleMacros) { + llvm::StringMap EditedFiles; + ExtraFiles["Test.cpp"] = ""; + FileName = "Test.hpp"; + + struct { + llvm::StringRef Test; + llvm::StringRef ExpectedHeader; + llvm::StringRef ExpectedSource; + } Cases[] = { + {R"cpp( + #define BODY { return; } + void f^oo()BODY)cpp", + R"cpp( + #define BODY { return; } + void foo();)cpp", + "void foo()BODY"}, + + {R"cpp( + #define BODY return; + void f^oo(){BODY})cpp", + R"cpp( + #define BODY return; + void foo();)cpp", + "void foo(){BODY}"}, + + {R"cpp( + #define TARGET void foo() + [[TARGET]]{ return; })cpp", + R"cpp( + #define TARGET void foo() + TARGET;)cpp", + "TARGET{ return; }"}, + + {R"cpp( + #define TARGET foo + void [[TARGET]](){ return; })cpp", + R"cpp( + #define TARGET foo + void TARGET();)cpp", + "void TARGET(){ return; }"}, + }; + for (const auto &Case : Cases) { + SCOPED_TRACE(Case.Test); + EXPECT_EQ(apply(Case.Test, &EditedFiles), Case.ExpectedHeader); + EXPECT_THAT(EditedFiles, testing::ElementsAre(FileWithContents( + testPath("Test.cpp"), Case.ExpectedSource))); + } +} + +TEST_F(DefineOutlineTest, QualifyReturnValue) { + FileName = "Test.hpp"; + ExtraFiles["Test.cpp"] = ""; + + struct { + llvm::StringRef Test; + llvm::StringRef ExpectedHeader; + llvm::StringRef ExpectedSource; + } Cases[] = { + {R"cpp( + namespace a { class Foo; } + using namespace a; + Foo fo^o() { return; })cpp", + R"cpp( + namespace a { class Foo; } + using namespace a; + Foo foo() ;)cpp", + "a::Foo foo() { return; }"}, + {R"cpp( + namespace a { + class Foo { + class Bar {}; + Bar fo^o() { return {}; } + }; + })cpp", + R"cpp( + namespace a { + class Foo { + class Bar {}; + Bar foo() ; + }; + })cpp", + "a::Foo::Bar a::Foo::foo() { return {}; }\n"}, + {R"cpp( + class Foo; + Foo fo^o() { return; })cpp", + R"cpp( + class Foo; + Foo foo() ;)cpp", + "Foo foo() { return; }"}, + }; + llvm::StringMap EditedFiles; + for (auto &Case : Cases) { + apply(Case.Test, &EditedFiles); + EXPECT_EQ(apply(Case.Test, &EditedFiles), Case.ExpectedHeader); + EXPECT_THAT(EditedFiles, testing::ElementsAre(FileWithContents( + testPath("Test.cpp"), Case.ExpectedSource))); + } +} + +TEST_F(DefineOutlineTest, QualifyFunctionName) { + FileName = "Test.hpp"; + struct { + llvm::StringRef TestHeader; + llvm::StringRef TestSource; + llvm::StringRef ExpectedHeader; + llvm::StringRef ExpectedSource; + } Cases[] = { + { + R"cpp( + namespace a { + namespace b { + class Foo { + void fo^o() {} + }; + } + })cpp", + "", + R"cpp( + namespace a { + namespace b { + class Foo { + void foo() ; + }; + } + })cpp", + "void a::b::Foo::foo() {}\n", + }, + { + "namespace a { namespace b { void f^oo() {} } }", + "namespace a{}", + "namespace a { namespace b { void foo() ; } }", + "namespace a{void b::foo() {} }", + }, + { + "namespace a { namespace b { void f^oo() {} } }", + "using namespace a;", + "namespace a { namespace b { void foo() ; } }", + // FIXME: Take using namespace directives in the source file into + // account. This can be spelled as b::foo instead. + "using namespace a;void a::b::foo() {} ", + }, + }; + llvm::StringMap EditedFiles; + for (auto &Case : Cases) { + ExtraFiles["Test.cpp"] = Case.TestSource; + EXPECT_EQ(apply(Case.TestHeader, &EditedFiles), Case.ExpectedHeader); + EXPECT_THAT(EditedFiles, testing::ElementsAre(FileWithContents( + testPath("Test.cpp"), Case.ExpectedSource))) + << Case.TestHeader; + } +} } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index b96feecdf3d61..91a196deb6f41 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -196,6 +196,14 @@ Improvements to clang-tidy ` check now supports a `StringNames` option enabling its application to custom string classes. +- Improved :doc:`modernize-avoid-bind + ` check. + + The check now supports supports diagnosing and fixing arbitrary callables instead of + only simple free functions. The `PermissiveParameterList` option has also been + added to address situations where the existing fix-it logic would sometimes generate + code that no longer compiles. + Improvements to include-fixer ----------------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-bind.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-bind.rst index 7ea9beca8e882..82c290e4a21b7 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-bind.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize-avoid-bind.rst @@ -3,10 +3,15 @@ modernize-avoid-bind ==================== -The check finds uses of ``std::bind`` and replaces simple uses with lambdas. -Lambdas will use value-capture where required. +The check finds uses of ``std::bind`` and ``boost::bind`` and replaces them +with lambdas. Lambdas will use value-capture unless reference capture is +explicitly requested with ``std::ref`` or ``boost::ref``. -Right now it only handles free functions, not member functions. +It supports arbitrary callables including member functions, function objects, +and free functions, and all variations thereof. Anything that you can pass +to the first argument of ``bind`` should be diagnosable. Currently, the only +known case where a fix-it is unsupported is when the same placeholder is +specified multiple times in the parameter list. Given: @@ -35,3 +40,49 @@ is replaced by: ``std::bind`` can be hard to read and can result in larger object files and binaries due to type information that will not be produced by equivalent lambdas. + +Options +------- + +.. option:: PermissiveParameterList + + If the option is set to non-zero, the check will append ``auto&&...`` to the end + of every placeholder parameter list. Without this, it is possible for a fix-it + to perform an incorrect transformation in the case where the result of the ``bind`` + is used in the context of a type erased functor such as ``std::function`` which + allows mismatched arguments. For example: + + +.. code-block:: c++ + + int add(int x, int y) { return x + y; } + int foo() { + std::function ignore_args = std::bind(add, 2, 2); + return ignore_args(3, 3); + } + +is valid code, and returns `4`. The actual values passed to ``ignore_args`` are +simply ignored. Without ``PermissiveParameterList``, this would be transformed into + +.. code-block:: c++ + + int add(int x, int y) { return x + y; } + int foo() { + std::function ignore_args = [] { return add(2, 2); } + return ignore_args(3, 3); + } + +which will *not* compile, since the lambda does not contain an ``operator()`` that +that accepts 2 arguments. With permissive parameter list, it instead generates + +.. code-block:: c++ + + int add(int x, int y) { return x + y; } + int foo() { + std::function ignore_args = [](auto&&...) { return add(2, 2); } + return ignore_args(3, 3); + } + +which is correct. + +This check requires using C++14 or higher to run. diff --git a/clang-tools-extra/test/clang-change-namespace/macro.cpp b/clang-tools-extra/test/clang-change-namespace/macro.cpp index ba47de603da81..40c4caf058993 100644 --- a/clang-tools-extra/test/clang-change-namespace/macro.cpp +++ b/clang-tools-extra/test/clang-change-namespace/macro.cpp @@ -1,7 +1,7 @@ // RUN: cp %S/macro.cpp %T/macro.cpp // RUN: echo "#define USING using na::nc::X" > %T/macro.h // -// RUN: clang-change-namespace -old_namespace "na::nb" -new_namespace "x::y" --file_pattern "macro.cpp" --i %T/macro.cpp -- +// RUN: clang-change-namespace -old_namespace "na::nb" -new_namespace "x::y" --file_pattern "macro.cpp$" --i %T/macro.cpp -- // RUN: FileCheck -input-file=%T/macro.cpp -check-prefix=CHECK-CC %s // RUN: FileCheck -input-file=%T/macro.h -check-prefix=CHECK-HEADER %s // diff --git a/clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp b/clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp new file mode 100644 index 0000000000000..c18dd7bd1e932 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp @@ -0,0 +1,31 @@ +// RUN: %check_clang_tidy %s bugprone-suspicious-semicolon %t -- -- -std=c++17 + +void fail() +{ + int x = 0; + if(x > 5); (void)x; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: potentially unintended semicolon [bugprone-suspicious-semicolon] + // CHECK-FIXES: if(x > 5) (void)x; +} + +template +int foo(int a) { + if constexpr(X > 0) { + return a; + } + return a + 1; +} + +template +int foo2(int a) { + // FIXME: diagnose the case below. See https://reviews.llvm.org/D46234 + // for details. + if constexpr(X > 0); + return a; + return a + 1; +} + +int main(void) { + foo2<0>(1); + return foo<0>(1); +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp index 2cc45e83b2037..8d128352e7894 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp @@ -43,6 +43,7 @@ #define GOOD30(args...) std::cout << args; #define GOOD31(X) A*X=2 #define GOOD32(X) std::vector +#define GOOD33(x) if (!a__##x) a_##x = &f(#x) // These are allowed for now.. #define MAYBE1 *12.34 diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-string-integer-assignment.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-string-integer-assignment.cpp index 18fe5ef4e5c2c..2c288e0bbddf9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-string-integer-assignment.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-string-integer-assignment.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s bugprone-string-integer-assignment %t +// RUN: %check_clang_tidy %s bugprone-string-integer-assignment %t -- -- -fno-delayed-template-parsing namespace std { template @@ -103,6 +103,8 @@ struct S { static constexpr T t = 0x8000; std::string s; void f(char c) { s += c | static_cast(t); } + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: an integer is interpreted as a chara + // CHECK-FIXES: {{^}} void f(char c) { s += std::to_string(c | static_cast(t)); } }; template S; diff --git a/clang-tools-extra/test/clang-tidy/checkers/google-readability-namespace-comments.cpp b/clang-tools-extra/test/clang-tidy/checkers/google-readability-namespace-comments.cpp index b4e79c97c0056..591c9dae5a74e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/google-readability-namespace-comments.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/google-readability-namespace-comments.cpp @@ -25,10 +25,10 @@ void f(); // So that the namespace isn't empty. // 5 // 6 // 7 -// CHECK-MESSAGES: :[[@LINE+2]]:1: warning: namespace 'MACRO' not terminated with -// CHECK-MESSAGES: :[[@LINE-10]]:11: note: namespace 'MACRO' starts here +// CHECK-MESSAGES: :[[@LINE+2]]:1: warning: namespace 'macro_expansion' not terminated with +// CHECK-MESSAGES: :[[@LINE-10]]:11: note: namespace 'macro_expansion' starts here } -// CHECK-FIXES: } // namespace MACRO +// CHECK-FIXES: } // namespace macro_expansion namespace short1 { namespace short2 { diff --git a/clang-tools-extra/test/clang-tidy/checkers/llvm-namespace-comment.cpp b/clang-tools-extra/test/clang-tidy/checkers/llvm-namespace-comment.cpp deleted file mode 100644 index a7d315693421d..0000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/llvm-namespace-comment.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// RUN: %check_clang_tidy %s llvm-namespace-comment %t - -namespace n1 { -namespace n2 { - void f(); - - - // CHECK-MESSAGES: :[[@LINE+2]]:1: warning: namespace 'n2' not terminated with a closing comment [llvm-namespace-comment] - // CHECK-MESSAGES: :[[@LINE+1]]:2: warning: namespace 'n1' not terminated with a closing comment [llvm-namespace-comment] -}} -// CHECK-FIXES: } // namespace n2 -// CHECK-FIXES: } // namespace n1 - -#define MACRO macro_expansion -namespace MACRO { - void f(); - // CHECK-MESSAGES: :[[@LINE+1]]:1: warning: namespace 'MACRO' not terminated with a closing comment [llvm-namespace-comment] -} -// CHECK-FIXES: } // namespace MACRO - -namespace MACRO { - void g(); -} // namespace MACRO - -namespace MACRO { - void h(); - // CHECK-MESSAGES: :[[@LINE+1]]:2: warning: namespace 'MACRO' ends with a comment that refers to an expansion of macro [llvm-namespace-comment] -} // namespace macro_expansion -// CHECK-FIXES: } // namespace MACRO - -namespace n1 { -namespace MACRO { -namespace n2 { - void f(); - // CHECK-MESSAGES: :[[@LINE+3]]:1: warning: namespace 'n2' not terminated with a closing comment [llvm-namespace-comment] - // CHECK-MESSAGES: :[[@LINE+2]]:2: warning: namespace 'MACRO' not terminated with a closing comment [llvm-namespace-comment] - // CHECK-MESSAGES: :[[@LINE+1]]:3: warning: namespace 'n1' not terminated with a closing comment [llvm-namespace-comment] -}}} -// CHECK-FIXES: } // namespace n2 -// CHECK-FIXES: } // namespace MACRO -// CHECK-FIXES: } // namespace n1 diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp index 119eff67318ea..8e546b44ab74d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp @@ -233,7 +233,7 @@ struct a { template class d { a e; - void f() { e.b(); } + void f() { e.b(0); } }; } // namespace } // namespace PR38055 diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind-permissive-parameter-list.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind-permissive-parameter-list.cpp new file mode 100644 index 0000000000000..6c81a6e9ab97d --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind-permissive-parameter-list.cpp @@ -0,0 +1,58 @@ +// RUN: %check_clang_tidy -std=c++14-or-later %s modernize-avoid-bind %t -- \ +// RUN: -config="{CheckOptions: [ \ +// RUN: {key: modernize-avoid-bind.PermissiveParameterList, value: 1}]}" -- + +namespace std { +inline namespace impl { +template +class bind_rt {}; + +template +bind_rt bind(Fp &&, Arguments &&...); +} // namespace impl + +template +T ref(T &t); +} // namespace std + +int add(int x, int y) { return x + y; } + +// Let's fake a minimal std::function-like facility. +namespace std { +template +_Tp declval(); + +template +struct __res { + template + static decltype(declval<_Functor>()(_Args()...)) _S_test(int); + + template + static void _S_test(...); + + using type = decltype(_S_test<_ArgTypes...>(0)); +}; + +template +struct function; + +template +struct function { + template ::type> + function(_Functor) {} +}; +} // namespace std + +struct placeholder {}; +placeholder _1; + +void testLiteralParameters() { + auto AAA = std::bind(add, 2, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto AAA = [](auto && ...) { return add(2, 2); }; + + auto BBB = std::bind(add, _1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto BBB = [](auto && PH1, auto && ...) { return add(PH1, 2); }; +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind.cpp index fa60cdc2c9d08..7e00858c1acce 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-avoid-bind.cpp @@ -8,75 +8,62 @@ class bind_rt {}; template bind_rt bind(Fp &&, Arguments &&...); } + +template +T ref(T &t); } -int add(int x, int y) { return x + y; } +namespace boost { +template +class bind_rt {}; -void f() { - auto clj = std::bind(add, 2, 2); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind [modernize-avoid-bind] - // CHECK-FIXES: auto clj = [] { return add(2, 2); }; -} +template +bind_rt bind(const Fp &, Arguments...); -void g() { - int x = 2; - int y = 2; - auto clj = std::bind(add, x, y); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // CHECK-FIXES: auto clj = [=] { return add(x, y); }; +template +struct reference_wrapper { + explicit reference_wrapper(T &t) {} +}; + +template +reference_wrapper const ref(T &t) { + return reference_wrapper(t); } -struct placeholder {}; -placeholder _1; -placeholder _2; +} // namespace boost -void h() { - int x = 2; - auto clj = std::bind(add, x, _1); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // CHECK-FIXES: auto clj = [=](auto && arg1) { return add(x, arg1); }; -} +namespace C { +int add(int x, int y) { return x + y; } +} // namespace C -struct A; -struct B; -bool ABTest(const A &, const B &); +struct Foo { + static int add(int x, int y) { return x + y; } +}; -void i() { - auto BATest = std::bind(ABTest, _2, _1); - // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: prefer a lambda to std::bind - // CHECK-FIXES: auto BATest = [](auto && arg1, auto && arg2) { return ABTest(arg2, arg1); }; -} +struct D { + D() = default; + void operator()(int x, int y) const {} -void j() { - auto clj = std::bind(add, 2, 2, 2); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // No fix is applied for argument mismatches. - // CHECK-FIXES: auto clj = std::bind(add, 2, 2, 2); -} + void MemberFunction(int x) {} -void k() { - auto clj = std::bind(add, _1, _1); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // No fix is applied for reused placeholders. - // CHECK-FIXES: auto clj = std::bind(add, _1, _1); -} + static D *create(); +}; -void m() { - auto clj = std::bind(add, 1, add(2, 5)); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // No fix is applied for nested calls. - // CHECK-FIXES: auto clj = std::bind(add, 1, add(2, 5)); -} +struct F { + F(int x) {} + ~F() {} -namespace C { - int add(int x, int y){ return x + y; } -} + int get() { return 42; } +}; -void n() { - auto clj = std::bind(C::add, 1, 1); - // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // CHECK-FIXES: auto clj = [] { return C::add(1, 1); }; -} +void UseF(F); + +struct placeholder {}; +placeholder _1; +placeholder _2; + +int add(int x, int y) { return x + y; } +int addThree(int x, int y, int z) { return x + y + z; } // Let's fake a minimal std::function-like facility. namespace std { @@ -114,10 +101,213 @@ struct Callback { void Reset(std::function); }; -void test(Thing *t) { +int GlobalVariable = 42; + +struct TestCaptureByValueStruct { + int MemberVariable; + static int StaticMemberVariable; + F MemberStruct; + + void testCaptureByValue(int Param, F f) { + int x = 3; + int y = 4; + auto AAA = std::bind(add, x, y); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto AAA = [x, y] { return add(x, y); }; + + // When the captured variable is repeated, it should only appear in the capture list once. + auto BBB = std::bind(add, x, x); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto BBB = [x] { return add(x, x); }; + + int LocalVariable; + // Global variables shouldn't be captured at all, and members should be captured through this. + auto CCC = std::bind(add, MemberVariable, GlobalVariable); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto CCC = [this] { return add(MemberVariable, GlobalVariable); }; + + // Static member variables shouldn't be captured, but locals should + auto DDD = std::bind(add, TestCaptureByValueStruct::StaticMemberVariable, LocalVariable); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto DDD = [LocalVariable] { return add(TestCaptureByValueStruct::StaticMemberVariable, LocalVariable); }; + + auto EEE = std::bind(add, Param, Param); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto EEE = [Param] { return add(Param, Param); }; + + // The signature of boost::bind() is different, and causes + // CXXBindTemporaryExprs to be created in certain cases. So let's test + // those here. + auto FFF = boost::bind(UseF, f); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to boost::bind [modernize-avoid-bind] + // CHECK-FIXES: auto FFF = [f] { return UseF(f); }; + + auto GGG = boost::bind(UseF, MemberStruct); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to boost::bind [modernize-avoid-bind] + // CHECK-FIXES: auto GGG = [this] { return UseF(MemberStruct); }; + } +}; + +void testLiteralParameters() { + auto AAA = std::bind(add, 2, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto AAA = [] { return add(2, 2); }; + + auto BBB = std::bind(addThree, 2, 3, 4); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind [modernize-avoid-bind] + // CHECK-FIXES: auto BBB = [] { return addThree(2, 3, 4); }; +} + +void testCaptureByReference() { + int x = 2; + int y = 2; + auto AAA = std::bind(add, std::ref(x), std::ref(y)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto AAA = [&x, &y] { return add(x, y); }; + + auto BBB = std::bind(add, std::ref(x), y); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto BBB = [&x, y] { return add(x, y); }; + + auto CCC = std::bind(add, y, std::ref(x)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto CCC = [y, &x] { return add(y, x); }; + + // Make sure it works with boost::ref() too which has slightly different + // semantics. + auto DDD = boost::bind(add, boost::ref(x), boost::ref(y)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to boost::bind + // CHECK-FIXES: auto DDD = [&x, &y] { return add(x, y); }; + + auto EEE = boost::bind(add, boost::ref(x), y); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to boost::bind + // CHECK-FIXES: auto EEE = [&x, y] { return add(x, y); }; + + auto FFF = boost::bind(add, y, boost::ref(x)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to boost::bind + // CHECK-FIXES: auto FFF = [y, &x] { return add(y, x); }; +} + +void testCaptureByInitExpression() { + int x = 42; + auto AAA = std::bind(add, x, F(x).get()); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto AAA = [x, capture0 = F(x).get()] { return add(x, capture0); }; +} + +void testFunctionObjects() { + D d; + D *e = nullptr; + auto AAA = std::bind(d, 1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto AAA = [d] { return d(1, 2); } + + auto BBB = std::bind(*e, 1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto BBB = [e] { return (*e)(1, 2); } + + auto CCC = std::bind(D{}, 1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto CCC = [] { return D{}(1, 2); } + + auto DDD = std::bind(D(), 1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto DDD = [] { return D()(1, 2); } + + auto EEE = std::bind(*D::create(), 1, 2); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto EEE = [Func = *D::create()] { return Func(1, 2); }; +} + +void testPlaceholders() { + int x = 2; + auto AAA = std::bind(add, x, _1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto AAA = [x](auto && PH1) { return add(x, PH1); }; + + auto BBB = std::bind(add, _2, _1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto BBB = [](auto && PH1, auto && PH2) { return add(PH2, PH1); }; + + // No fix is applied for reused placeholders. + auto CCC = std::bind(add, _1, _1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto CCC = std::bind(add, _1, _1); + + // When a placeholder is skipped, we always add skipped ones to the lambda as + // unnamed parameters. + auto DDD = std::bind(add, _2, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto DDD = [](auto &&, auto && PH2) { return add(PH2, 1); }; +} + +void testGlobalFunctions() { + auto AAA = std::bind(C::add, 1, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto AAA = [] { return C::add(1, 1); }; + + auto BBB = std::bind(Foo::add, 1, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto BBB = [] { return Foo::add(1, 1); }; + + // The & should get removed inside of the lambda body. + auto CCC = std::bind(&C::add, 1, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto CCC = [] { return C::add(1, 1); }; + + auto DDD = std::bind(&Foo::add, 1, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto DDD = [] { return Foo::add(1, 1); }; + + auto EEE = std::bind(&add, 1, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto EEE = [] { return add(1, 1); }; +} + +void testCapturedSubexpressions() { + int x = 3; + int y = 3; + + auto AAA = std::bind(add, 1, add(2, 5)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // Results of nested calls are captured by value. + // CHECK-FIXES: auto AAA = [capture0 = add(2, 5)] { return add(1, capture0); }; + + auto BBB = std::bind(add, x, add(y, 5)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind + // Results of nested calls are captured by value. + // CHECK-FIXES: auto BBB = [x, capture0 = add(y, 5)] { return add(x, capture0); }; +} + +struct E { + void MemberFunction(int x) {} + + void testMemberFunctions() { + D *d; + D dd; + auto AAA = std::bind(&D::MemberFunction, d, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto AAA = [d] { d->MemberFunction(1); }; + + auto BBB = std::bind(&D::MemberFunction, &dd, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto BBB = [ObjectPtr = &dd] { ObjectPtr->MemberFunction(1); }; + + auto CCC = std::bind(&E::MemberFunction, this, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto CCC = [this] { MemberFunction(1); }; + + // Test what happens when the object pointer is itself a placeholder. + auto DDD = std::bind(&D::MemberFunction, _1, 1); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer a lambda to std::bind + // CHECK-FIXES: auto DDD = [](auto && PH1) { PH1->MemberFunction(1); }; + } +}; + +void testStdFunction(Thing *t) { Callback cb; if (t) cb.Reset(std::bind(UseThing, t)); // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: prefer a lambda to std::bind - // CHECK-FIXES: cb.Reset([=] { return UseThing(t); }); + // CHECK-FIXES: cb.Reset([t] { return UseThing(t); }); } diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt index 3f5ac957f81d4..626256af9c1b6 100644 --- a/clang/bindings/python/tests/CMakeLists.txt +++ b/clang/bindings/python/tests/CMakeLists.txt @@ -32,6 +32,11 @@ if(WIN32) set(RUN_PYTHON_TESTS FALSE) endif() +# The Python FFI interface is broken on AIX: https://bugs.python.org/issue38628. +if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") + set(RUN_PYTHON_TESTS FALSE) +endif() + # AArch64, Hexagon, and Sparc have known test failures that need to be # addressed. # SystemZ has broken Python/FFI interface: diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 492eec71f2e4e..e8d561fae9564 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -2430,31 +2430,10 @@ Enable XNACK (AMDGPU only) ARM --- - -.. option:: -ffixed-r6 - -Reserve the r6 register (ARM only) - -.. option:: -ffixed-r7 - -Reserve the r7 register (ARM only) - -.. option:: -ffixed-r8 - -Reserve the r8 register (ARM only) - .. option:: -ffixed-r9 Reserve the r9 register (ARM only) -.. option:: -ffixed-r10 - -Reserve the r10 register (ARM only) - -.. option:: -ffixed-r11 - -Reserve the r11 register (ARM only) - .. option:: -mexecute-only, -mno-execute-only, -mpure-code Disallow generation of data access to code sections (ARM only) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index f438ec7f871b7..2f7483435fd4f 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2328,6 +2328,9 @@ the configuration (without a prefix: ``Auto``). true: false: x = ( int32 )y vs. x = (int32)y +**SpacesInConditionalStatement** (``bool``) + If ``true``, spaces will be inserted around if/for/while (and similar) conditions. + **SpacesInContainerLiterals** (``bool``) If ``true``, spaces are inserted inside container literals (e.g. ObjC and Javascript array and dict literals). diff --git a/clang/docs/ConstantInterpreter.rst b/clang/docs/ConstantInterpreter.rst index d4fb8f6f34aa8..a86161c8fa011 100644 --- a/clang/docs/ConstantInterpreter.rst +++ b/clang/docs/ConstantInterpreter.rst @@ -10,8 +10,7 @@ Introduction The constexpr interpreter aims to replace the existing tree evaluator in clang, improving performance on constructs which are executed inefficiently by the evaluator. The interpreter is activated using the following flags: -* ``-fexperimental-new-constant-interpreter`` enables the interpreter, falling back to the evaluator for unsupported features -* ``-fforce-experimental-new-constant-interpreter`` forces the use of the interpreter, bailing out if an unsupported feature is encountered +* ``-fexperimental-new-constant-interpreter`` enables the interpreter, emitting an error if an unsupported feature is encountered Bytecode Compilation ==================== diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index ee80f1afb9e62..6c113fa6b43fe 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -153,7 +153,7 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | task extension | mutexinoutset dependence-type for tasks | :good:`done` | D53380,D57576 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task extension | combined taskloop constructs | :none:`unclaimed` | | +| task extension | combined taskloop constructs | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | task extension | master taskloop | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ @@ -173,9 +173,9 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device extension | OMP_TARGET_OFFLOAD environment variable | :good:`done` | D50522 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device extension | support full 'defaultmap' functionality | :part:`worked on` | D69204 | +| device extension | support full 'defaultmap' functionality | :good:`done` | D69204 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device extension | device specific functions | :none:`unclaimed` | | +| device extension | device specific functions | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device extension | clause: device_type | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ @@ -191,9 +191,9 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device extension | allow access to the reference count (omp_target_is_present) | :part:`worked on` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device extension | requires directive (unified shared memory) | :part:`worked on` | | +| device extension | requires directive (unified shared memory) | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device extension | clause: unified_address, unified_shared_memory | :part:`worked on` | D52625,D52359 | +| device extension | clause: unified_address, unified_shared_memory | :good:`done` | D52625,D52359 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device extension | clause: reverse_offload | :none:`unclaimed parts` | D52780 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4ac300deb589a..37a8f30e0bc9c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -78,6 +78,10 @@ Non-comprehensive list of changes in this release been extended to detect these cases, so that code relying on them can be detected and fixed. +* The Implicit Conversion Sanitizer (``-fsanitize=implicit-conversion``) has + learned to sanitize pre/post increment/decrement of types with bit width + smaller than ``int``. + - For X86 target, -march=skylake-avx512, -march=icelake-client, -march=icelake-server, -march=cascadelake, -march=cooperlake will default to not using 512-bit zmm registers in vectorized code unless 512-bit intrinsics diff --git a/clang/docs/SourceBasedCodeCoverage.rst b/clang/docs/SourceBasedCodeCoverage.rst index 73197a57713f9..7e711819be34a 100644 --- a/clang/docs/SourceBasedCodeCoverage.rst +++ b/clang/docs/SourceBasedCodeCoverage.rst @@ -302,3 +302,37 @@ Drawbacks and limitations If the call to ``may_throw()`` propagates an exception into ``f``, the code coverage tool may mark the ``return`` statement as executed even though it is not. A call to ``longjmp()`` can have similar effects. + +Clang implementation details +============================ + +This section may be of interest to those wishing to understand or improve +the clang code coverage implementation. + +Gap regions +----------- + +Gap regions are source regions with counts. A reporting tool cannot set a line +execution count to the count from a gap region unless that region is the only +one on a line. + +Gap regions are used to eliminate unnatural artifacts in coverage reports, such +as red "unexecuted" highlights present at the end of an otherwise covered line, +or blue "executed" highlights present at the start of a line that is otherwise +not executed. + +Switch statements +----------------- + +The region mapping for a switch body consists of a gap region that covers the +entire body (starting from the '{' in 'switch (...) {', and terminating where the +last case ends). This gap region has a zero count: this causes "gap" areas in +between case statements, which contain no executable code, to appear uncovered. + +When a switch case is visited, the parent region is extended: if the parent +region has no start location, its start location becomes the start of the case. +This is used to support switch statements without a ``CompoundStmt`` body, in +which the switch body and the single case share a count. + +For switches with ``CompoundStmt`` bodies, a new region is created at the start +of each switch case. diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 714681d7f4cea..62e2575c6b26e 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1231,10 +1231,10 @@ are listed below. **-f[no-]trapping-math** - ``-fno-trapping-math`` allows optimizations that assume that - floating point operations cannot generate traps such as divide-by-zero, - overflow and underflow. Defaults to ``-ftrapping-math``. - Currently this option has no effect. + Control floating point exception behavior. ``-fno-trapping-math`` allows optimizations that assume that floating point operations cannot generate traps such as divide-by-zero, overflow and underflow. + +- The option ``-ftrapping-math`` behaves identically to ``-ffp-exception-behavior=strict``. +- The option ``-fno-trapping-math`` behaves identically to ``-ffp-exception-behavior=ignore``. This is the default. .. option:: -ffp-contract= @@ -1319,6 +1319,52 @@ are listed below. Defaults to ``-fno-finite-math``. +.. _opt_frounding-math: + +**-f[no-]rounding-math** + +Force floating-point operations to honor the dynamically-set rounding mode by default. + +The result of a floating-point operation often cannot be exactly represented in the result type and therefore must be rounded. IEEE 754 describes different rounding modes that control how to perform this rounding, not all of which are supported by all implementations. C provides interfaces (``fesetround`` and ``fesetenv``) for dynamically controlling the rounding mode, and while it also recommends certain conventions for changing the rounding mode, these conventions are not typically enforced in the ABI. Since the rounding mode changes the numerical result of operations, the compiler must understand something about it in order to optimize floating point operations. + +Note that floating-point operations performed as part of constant initialization are formally performed prior to the start of the program and are therefore not subject to the current rounding mode. This includes the initialization of global variables and local ``static`` variables. Floating-point operations in these contexts will be rounded using ``FE_TONEAREST``. + +- The option ``-fno-rounding-math`` allows the compiler to assume that the rounding mode is set to ``FE_TONEAREST``. This is the default. +- The option ``-frounding-math`` forces the compiler to honor the dynamically-set rounding mode. This prevents optimizations which might affect results if the rounding mode changes or is different from the default; for example, it prevents floating-point operations from being reordered across most calls and prevents constant-folding when the result is not exactly representable. + +.. option:: -ffp-model= + + Specify floating point behavior. ``-ffp-model`` is an umbrella + option that encompasses functionality provided by other, single + purpose, floating point options. Valid values are: ``precise``, ``strict``, + and ``fast``. + Details: + + * ``precise`` Disables optimizations that are not value-safe on floating-point data, although FP contraction (FMA) is enabled (``-ffp-contract=fast``). This is the default behavior. + * ``strict`` Enables ``-frounding-math`` and ``-ffp-exception-behavior=strict``, and disables contractions (FMA). All of the ``-ffast-math`` enablements are disabled. + * ``fast`` Behaves identically to specifying both ``-ffast-math`` and ``ffp-contract=fast`` + + Note: If your command line specifies multiple instances + of the ``-ffp-model`` option, or if your command line option specifies + ``-ffp-model`` and later on the command line selects a floating point + option that has the effect of negating part of the ``ffp-model`` that + has been selected, then the compiler will issue a diagnostic warning + that the override has occurred. + +.. option:: -ffp-exception-behavior= + + Specify the floating-point exception behavior. + + Valid values are: ``ignore``, ``maytrap``, and ``strict``. + The default value is ``ignore``. Details: + + * ``ignore`` The compiler assumes that the exception status flags will not be read and that floating point exceptions will be masked. + * ``maytrap`` The compiler avoids transformations that may raise exceptions that would not have been raised by the original code. Constant folding performed by the compiler is exempt from this option. + * ``strict`` The compiler ensures that all transformations strictly preserve the floating point exception semantics of the original code. + + + + .. _controlling-code-generation: Controlling Code Generation diff --git a/clang/examples/clang-interpreter/main.cpp b/clang/examples/clang-interpreter/main.cpp index 6ac142bffdffc..db6b0cce4fd17 100644 --- a/clang/examples/clang-interpreter/main.cpp +++ b/clang/examples/clang-interpreter/main.cpp @@ -54,6 +54,7 @@ class SimpleJIT { std::unique_ptr TM; const DataLayout DL; MangleAndInterner Mangle{ES, DL}; + JITDylib &MainJD{ES.createJITDylib("
")}; RTDyldObjectLinkingLayer ObjectLayer{ES, createMemMgr}; IRCompileLayer CompileLayer{ES, ObjectLayer, SimpleCompiler(*TM)}; @@ -66,7 +67,7 @@ class SimpleJIT { std::unique_ptr ProcessSymbolsGenerator) : TM(std::move(TM)), DL(std::move(DL)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); - ES.getMainJITDylib().addGenerator(std::move(ProcessSymbolsGenerator)); + MainJD.addGenerator(std::move(ProcessSymbolsGenerator)); } public: @@ -95,11 +96,11 @@ class SimpleJIT { const TargetMachine &getTargetMachine() const { return *TM; } Error addModule(ThreadSafeModule M) { - return CompileLayer.add(ES.getMainJITDylib(), std::move(M)); + return CompileLayer.add(MainJD, std::move(M)); } Expected findSymbol(const StringRef &Name) { - return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name)); + return ES.lookup({&MainJD}, Mangle(Name)); } Expected getSymbolAddress(const StringRef &Name) { diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 31adfc5c368a6..f4913540bab4d 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1964,6 +1964,14 @@ class FunctionDecl : public DeclaratorDecl, void setRangeEnd(SourceLocation E) { EndRangeLoc = E; } + /// Returns the location of the ellipsis of a variadic function. + SourceLocation getEllipsisLoc() const { + const auto *FPT = getType()->getAs(); + if (FPT && FPT->isVariadic()) + return FPT->getEllipsisLoc(); + return SourceLocation(); + } + SourceRange getSourceRange() const override LLVM_READONLY; // Function definitions. @@ -2188,6 +2196,10 @@ class FunctionDecl : public DeclaratorDecl, bool usesSEHTry() const { return FunctionDeclBits.UsesSEHTry; } void setUsesSEHTry(bool UST) { FunctionDeclBits.UsesSEHTry = UST; } + /// Indicates the function uses Floating Point constrained intrinsics + bool usesFPIntrin() const { return FunctionDeclBits.UsesFPIntrin; } + void setUsesFPIntrin(bool Val) { FunctionDeclBits.UsesFPIntrin = Val; } + /// Whether this function has been deleted. /// /// A function that is "deleted" (via the C++0x "= delete" syntax) @@ -2388,6 +2400,12 @@ class FunctionDecl : public DeclaratorDecl, /// limited representation in the AST. SourceRange getReturnTypeSourceRange() const; + /// Attempt to compute an informative source range covering the + /// function parameters, including the ellipsis of a variadic function. + /// The source range excludes the parentheses, and is invalid if there are + /// no parameters and no ellipsis. + SourceRange getParametersSourceRange() const; + /// Get the declared return type, which may differ from the actual return /// type if the return type is deduced. QualType getDeclaredReturnType() const { diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index adea10b33188b..54cdb84b6f330 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1534,10 +1534,13 @@ class DeclContext { /// Store the ODRHash after first calculation. uint64_t HasODRHash : 1; + + /// Indicates if the function uses Floating Point Constrained Intrinsics + uint64_t UsesFPIntrin : 1; }; /// Number of non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = 25 }; + enum { NumFunctionDeclBits = 26 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor @@ -1554,7 +1557,7 @@ class DeclContext { /// exactly 64 bits and thus the width of NumCtorInitializers /// will need to be shrunk if some bit is added to NumDeclContextBitfields, /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields. - uint64_t NumCtorInitializers : 23; + uint64_t NumCtorInitializers : 22; uint64_t IsInheritingConstructor : 1; /// Whether this constructor has a trail-allocated explicit specifier. diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 63d67bd3f55b2..0f2018fb9e8cb 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,7 +3041,9 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final : public Decl { +class LifetimeExtendedTemporaryDecl final + : public Decl, + public Mergeable { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h index 37ab8c084e57c..4023e023e9d56 100644 --- a/clang/include/clang/AST/JSONNodeDumper.h +++ b/clang/include/clang/AST/JSONNodeDumper.h @@ -126,7 +126,7 @@ class JSONNodeDumper ASTNameGenerator ASTNameGen; PrintingPolicy PrintPolicy; const comments::CommandTraits *Traits; - StringRef LastLocFilename; + StringRef LastLocFilename, LastLocPresumedFilename; unsigned LastLocLine, LastLocPresumedLine; using InnerAttrVisitor = ConstAttrVisitor; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 0ff5a614a864d..d293ea190aa43 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,6 +346,8 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); + void + VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index c047171730ba7..b15881a682ace 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2077,6 +2077,8 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { bool isAlignValT() const; // C++17 std::align_val_t bool isStdByteType() const; // C++17 std::byte bool isAtomicType() const; // C11 _Atomic() + bool isUndeducedAutoType() const; // C++11 auto or + // C++14 decltype(auto) #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ bool is##Id##Type() const; @@ -3734,9 +3736,9 @@ class FunctionProtoType final : public FunctionType, public llvm::FoldingSetNode, private llvm::TrailingObjects< - FunctionProtoType, QualType, FunctionType::FunctionTypeExtraBitfields, - FunctionType::ExceptionType, Expr *, FunctionDecl *, - FunctionType::ExtParameterInfo, Qualifiers> { + FunctionProtoType, QualType, SourceLocation, + FunctionType::FunctionTypeExtraBitfields, FunctionType::ExceptionType, + Expr *, FunctionDecl *, FunctionType::ExtParameterInfo, Qualifiers> { friend class ASTContext; // ASTContext creates these. friend TrailingObjects; @@ -3747,6 +3749,9 @@ class FunctionProtoType final // Always present. Note that for the vast majority of FunctionProtoType, // these will be the only trailing objects. // + // * Optionally if the function is variadic, the SourceLocation of the + // ellipsis. + // // * Optionally if some extra data is stored in FunctionTypeExtraBitfields // (see FunctionTypeExtraBitfields and FunctionTypeBitfields): // a single FunctionTypeExtraBitfields. Present if and only if @@ -3818,6 +3823,7 @@ class FunctionProtoType final RefQualifierKind RefQualifier = RQ_None; ExceptionSpecInfo ExceptionSpec; const ExtParameterInfo *ExtParameterInfos = nullptr; + SourceLocation EllipsisLoc; ExtProtoInfo() : Variadic(false), HasTrailingReturn(false) {} @@ -3836,6 +3842,10 @@ class FunctionProtoType final return getNumParams(); } + unsigned numTrailingObjects(OverloadToken) const { + return isVariadic(); + } + unsigned numTrailingObjects(OverloadToken) const { return hasExtraBitfields(); } @@ -3947,6 +3957,7 @@ class FunctionProtoType final ExtProtoInfo EPI; EPI.ExtInfo = getExtInfo(); EPI.Variadic = isVariadic(); + EPI.EllipsisLoc = getEllipsisLoc(); EPI.HasTrailingReturn = hasTrailingReturn(); EPI.ExceptionSpec.Type = getExceptionSpecType(); EPI.TypeQuals = getMethodQuals(); @@ -4048,6 +4059,11 @@ class FunctionProtoType final /// Whether this function prototype is variadic. bool isVariadic() const { return FunctionTypeBits.Variadic; } + SourceLocation getEllipsisLoc() const { + return isVariadic() ? *getTrailingObjects() + : SourceLocation(); + } + /// Determines whether this function prototype contains a /// parameter pack at the end. /// @@ -6517,6 +6533,10 @@ inline bool Type::isAtomicType() const { return isa(CanonicalType); } +inline bool Type::isUndeducedAutoType() const { + return isa(CanonicalType); +} + inline bool Type::isObjCQualifiedIdType() const { if (const auto *OPT = getAs()) return OPT->isObjCQualifiedIdType(); diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h index f305680d775cf..7f1d429ac3b42 100644 --- a/clang/include/clang/AST/TypeLoc.h +++ b/clang/include/clang/AST/TypeLoc.h @@ -173,9 +173,6 @@ class TypeLoc { TypeLoc IgnoreParens() const; - /// Strips MacroDefinitionTypeLocs from a type location. - TypeLoc IgnoreMacroDefinitions() const; - /// Find a type with the location of an explicit type qualifier. /// /// The result, if non-null, will be one of: diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 298f688f8c0ad..72564720b7db2 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1107,6 +1107,7 @@ def SYCLDeviceIndirectlyCallable : InheritableAttr { let LangOpts = [SYCLIsDevice]; let Documentation = [SYCLDeviceIndirectlyCallableDocs]; } + def SYCLIntelKernelArgsRestrict : InheritableAttr { let Spellings = [ CXX11<"intel", "kernel_args_restrict"> ]; let Subjects = SubjectList<[Function], ErrorDiag>; @@ -1193,27 +1194,27 @@ def OpenCLAccess : Attr { } def OpenCLPrivateAddressSpace : TypeAttr { - let Spellings = [Keyword<"__private">, Keyword<"private">, Clang<"ocl_private">]; + let Spellings = [Keyword<"__private">, Keyword<"private">, Clang<"opencl_private">]; let Documentation = [OpenCLAddressSpacePrivateDocs]; } def OpenCLGlobalAddressSpace : TypeAttr { - let Spellings = [Keyword<"__global">, Keyword<"global">, Clang<"ocl_global">]; + let Spellings = [Keyword<"__global">, Keyword<"global">, Clang<"opencl_global">]; let Documentation = [OpenCLAddressSpaceGlobalDocs]; } def OpenCLLocalAddressSpace : TypeAttr { - let Spellings = [Keyword<"__local">, Keyword<"local">, Clang<"ocl_local">]; + let Spellings = [Keyword<"__local">, Keyword<"local">, Clang<"opencl_local">]; let Documentation = [OpenCLAddressSpaceLocalDocs]; } def OpenCLConstantAddressSpace : TypeAttr { - let Spellings = [Keyword<"__constant">, Keyword<"constant">, Clang<"ocl_constant">]; + let Spellings = [Keyword<"__constant">, Keyword<"constant">, Clang<"opencl_constant">]; let Documentation = [OpenCLAddressSpaceConstantDocs]; } def OpenCLGenericAddressSpace : TypeAttr { - let Spellings = [Keyword<"__generic">, Keyword<"generic">, Clang<"ocl_generic">]; + let Spellings = [Keyword<"__generic">, Keyword<"generic">, Clang<"opencl_generic">]; let Documentation = [OpenCLAddressSpaceGenericDocs]; } @@ -3676,20 +3677,40 @@ def OMPDeclareVariant : InheritableAttr { } // TODO: add printing of real context selectors. OS << " match("; + int Used[OMP_CTX_SET_unknown] = {0}; for (unsigned I = 0, E = ctxSelectorSets_size(); I < E; ++I) { auto CtxSet = static_cast( *std::next(ctxSelectorSets_begin(), I)); - auto Ctx = static_cast( - *std::next(ctxSelectors_begin(), I)); - assert(CtxSet != OMP_CTX_SET_unknown && Ctx != OMP_CTX_unknown && - "Unknown context selector."); + if (Used[CtxSet]) + continue; + if (I > 0) + OS << ","; switch (CtxSet) { case OMP_CTX_SET_implementation: OS << "implementation={"; + break; + case OMP_CTX_SET_device: + OS << "device={"; + break; + case OMP_CTX_SET_unknown: + llvm_unreachable("Unknown context selector set."); + } + Used[CtxSet] = 1; + for (unsigned K = I, EK = ctxSelectors_size(); K < EK; ++K) { + auto CtxSetK = static_cast( + *std::next(ctxSelectorSets_begin(), K)); + if (CtxSet != CtxSetK) + continue; + if (K != I) + OS << ","; + auto Ctx = static_cast( + *std::next(ctxSelectors_begin(), K)); switch (Ctx) { case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); OS << "vendor("; - printScore(OS, Policy, I); + printScore(OS, Policy, K); if (implVendors_size() > 0) { OS << *implVendors(). begin(); for (StringRef VendorName : llvm::drop_begin(implVendors(), 1)) @@ -3698,16 +3719,8 @@ def OMPDeclareVariant : InheritableAttr { OS << ")"; break; case OMP_CTX_kind: - llvm_unreachable("Unexpected context selector in implementation set."); - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector."); - } - OS << "}"; - break; - case OMP_CTX_SET_device: - OS << "device={"; - switch (Ctx) { - case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); OS << "kind("; if (deviceKinds_size() > 0) { OS << *deviceKinds().begin(); @@ -3716,18 +3729,11 @@ def OMPDeclareVariant : InheritableAttr { } OS << ")"; break; - case OMP_CTX_vendor: - llvm_unreachable("Unexpected context selector in device set."); case OMP_CTX_unknown: llvm_unreachable("Unknown context selector."); } - OS << "}"; - break; - case OMP_CTX_SET_unknown: - llvm_unreachable("Unknown context selector set."); } - if (I != E - 1) - OS << ","; + OS << "}"; } OS << ")"; } diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 933dc24e97bf0..aabb5466314aa 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -149,8 +149,8 @@ def err_drv_missing_arg_mtp : Error< "missing argument to '%0'">; def err_drv_invalid_libcxx_deployment : Error< "invalid deployment target for -stdlib=libc++ (requires %0 or later)">; -def err_drv_invalid_argument_to_fdebug_prefix_map : Error< - "invalid argument '%0' to -fdebug-prefix-map">; +def err_drv_invalid_argument_to_option : Error< + "invalid argument '%0' to -%1">; def err_drv_malformed_sanitizer_blacklist : Error< "malformed sanitizer blacklist: '%0'">; def err_drv_duplicate_config : Error< @@ -454,6 +454,10 @@ def warn_drv_experimental_isel_incomplete_opt : Warning< "-fexperimental-isel support is incomplete for this architecture at the current optimization level">, InGroup; +def warn_drv_experimental_fp_control_incomplete_opt : Warning< + "Support for floating point control option %0 is incomplete and experimental">, + InGroup; + def warn_drv_moutline_unsupported_opt : Warning< "The '%0' architecture does not support -moutline; flag ignored">, InGroup; @@ -477,10 +481,6 @@ def warn_drv_msp430_hwmult_no_device : Warning<"no MCU device specified, but " "specify a MSP430 device, or -mhwmult to set hardware multiply type " "explicitly.">, InGroup; -// Frame pointer reservation. -def err_reserved_frame_pointer : Error< - "'%0' has been specified but '%1' is used as the frame pointer for this target">; - def warn_drv_libstdcxx_not_found : Warning< "include path for libstdc++ headers not found; pass '-stdlib=libc++' on the " "command line to use the libc++ standard library instead">, diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 31307b6aaf5fe..d8669c0336831 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -128,6 +128,8 @@ def CXX11CompatDeprecatedWritableStr : def DeprecatedAttributes : DiagGroup<"deprecated-attributes">; def DeprecatedCommaSubscript : DiagGroup<"deprecated-comma-subscript">; +def DeprecatedCopy : DiagGroup<"deprecated-copy">; +def DeprecatedCopyDtor : DiagGroup<"deprecated-copy-dtor">; def DeprecatedDeclarations : DiagGroup<"deprecated-declarations">; def UnavailableDeclarations : DiagGroup<"unavailable-declarations">; def UnguardedAvailabilityNew : DiagGroup<"unguarded-availability-new">; @@ -147,6 +149,8 @@ def DeprecatedWritableStr : DiagGroup<"deprecated-writable-strings", // FIXME: Why is DeprecatedImplementations not in this group? def Deprecated : DiagGroup<"deprecated", [DeprecatedAttributes, DeprecatedCommaSubscript, + DeprecatedCopy, + DeprecatedCopyDtor, DeprecatedDeclarations, DeprecatedDynamicExceptionSpec, DeprecatedIncrementBool, @@ -689,6 +693,7 @@ def ZeroLengthArray : DiagGroup<"zero-length-array">; def GNUZeroLineDirective : DiagGroup<"gnu-zero-line-directive">; def GNUZeroVariadicMacroArguments : DiagGroup<"gnu-zero-variadic-macro-arguments">; def Fallback : DiagGroup<"fallback">; +def MisleadingIndentation : DiagGroup<"misleading-indentation">; // This covers both the deprecated case (in C++98) // and the extension case (in C++11 onwards). @@ -812,6 +817,7 @@ def Move : DiagGroup<"move", [ ]>; def Extra : DiagGroup<"extra", [ + DeprecatedCopy, MissingFieldInitializers, IgnoredQualifiers, InitializerOverrides, @@ -879,7 +885,7 @@ def Consumed : DiagGroup<"consumed">; // Note that putting warnings in -Wall will not disable them by default. If a // warning should be active _only_ when -Wall is passed in, mark it as // DefaultIgnore in addition to putting it here. -def All : DiagGroup<"all", [Most, Parentheses, Switch, SwitchBool]>; +def All : DiagGroup<"all", [Most, Parentheses, Switch, SwitchBool, MisleadingIndentation]>; // Warnings that should be in clang-cl /w4. def : DiagGroup<"CL4", [All, Extra]>; @@ -1107,6 +1113,9 @@ def SpirCompat : DiagGroup<"spir-compat">; // Warning for the experimental-isel options. def ExperimentalISel : DiagGroup<"experimental-isel">; +// Warning for the experimental float control options. +def ExperimentalFloatControl : DiagGroup<"experimental-float-control">; + // A warning group specifically for warnings related to function // multiversioning. def FunctionMultiVersioning : DiagGroup<"function-multiversion">; @@ -1118,9 +1127,6 @@ def CrossTU : DiagGroup<"ctu">; def CTADMaybeUnsupported : DiagGroup<"ctad-maybe-unsupported">; -def FortifySource : DiagGroup<"fortify-source">; - def IntelFPGA : DiagGroup<"intel-fpga">; -// Register reservation. -def FixedRegs : DiagGroup<"fixed-registers">; +def FortifySource : DiagGroup<"fortify-source">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index fe0ad6ed14786..3d57942443817 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -61,6 +61,13 @@ def warn_null_statement : Warning< "remove unnecessary ';' to silence this warning">, InGroup, DefaultIgnore; +def warn_misleading_indentation : Warning< + "misleading indentation; statement is not part of " + "the previous '%select{if|else|for|while|else if}0'">, + InGroup, DefaultIgnore; +def note_previous_statement : Note< + "previous statement is here">; + def ext_thread_before : Extension<"'__thread' before '%0'">; def ext_keyword_as_ident : ExtWarn< "keyword '%0' will be made available as an identifier " diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 143cd137336f1..3e9e163b059ff 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -573,9 +573,13 @@ def err_access_decl : Error< "use using declarations instead">; def warn_deprecated_copy_operation : Warning< "definition of implicit copy %select{constructor|assignment operator}1 " - "for %0 is deprecated because it has a user-declared " - "%select{copy %select{assignment operator|constructor}1|destructor}2">, - InGroup, DefaultIgnore; + "for %0 is deprecated because it has a user-declared copy " + "%select{assignment operator|constructor}1">, + InGroup, DefaultIgnore; +def warn_deprecated_copy_dtor_operation : Warning< + "definition of implicit copy %select{constructor|assignment operator}1 " + "for %0 is deprecated because it has a user-declared destructor">, + InGroup, DefaultIgnore; def warn_cxx17_compat_exception_spec_in_signature : Warning< "mangled name of %0 will change in C++17 due to non-throwing exception " "specification in function signature">, InGroup; @@ -7771,8 +7775,6 @@ let CategoryName = "Inline Assembly Issue" in { def err_asm_unknown_register_name : Error<"unknown register name '%0' in asm">; def err_asm_invalid_global_var_reg : Error<"register '%0' unsuitable for " "global register variables on this target">; - def err_asm_missing_fixed_reg_opt : Error<"-ffixed-%0 is required for " - "global named register variable declaration">; def err_asm_register_size_mismatch : Error<"size of register '%0' does not " "match variable size">; def err_asm_bad_register_type : Error<"bad type for named register variable">; @@ -8770,6 +8772,12 @@ def err_32_bit_builtin_64_bit_tgt : Error< "this builtin is only available on 32-bit targets">; def err_builtin_x64_aarch64_only : Error< "this builtin is only available on x86-64 and aarch64 targets">; +def err_mips_builtin_requires_dsp : Error< + "this builtin requires 'dsp' ASE, please use -mdsp">; +def err_mips_builtin_requires_dspr2 : Error< + "this builtin requires 'dsp r2' ASE, please use -mdspr2">; +def err_mips_builtin_requires_msa : Error< + "this builtin requires 'msa' ASE, please use -mmsa">; def err_ppc_builtin_only_on_pwr7 : Error< "this builtin is only valid on POWER7 or later CPUs">; def err_x86_builtin_invalid_rounding : Error< @@ -9320,7 +9328,7 @@ def ext_omp_loop_not_canonical_init : ExtWarn< "('var = init' or 'T var = init')">, InGroup; def err_omp_loop_not_canonical_cond : Error< "condition of OpenMP for loop must be a relational comparison " - "('<', '<=', '>', %select{or '>='|'>=', or '!='}0) of loop variable %1">; + "('<', '<=', '>', %select{or '>='|'>=', or '!='}0) of loop variable %1">; def err_omp_loop_not_canonical_incr : Error< "increment clause of OpenMP for loop must perform simple addition " "or subtraction on loop variable %0">; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 01759f45b227a..dad63a0088485 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -259,6 +259,8 @@ LANGOPT(SinglePrecisionConstants , 1, 0, "treating double-precision floating poi LANGOPT(FastRelaxedMath , 1, 0, "OpenCL fast relaxed math") /// FP_CONTRACT mode (on/off/fast). ENUM_LANGOPT(DefaultFPContractMode, FPContractModeKind, 2, FPC_Off, "FP contraction type") +ENUM_LANGOPT(FPRoundingMode, FPRoundingModeKind, 3, FPR_ToNearest, "FP Rounding Mode type") +ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Ignore, "FP Exception Behavior Mode type") LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility") LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting") @@ -301,8 +303,6 @@ BENIGN_LANGOPT(ConstexprStepLimit, 32, 1048576, "maximum constexpr evaluation steps") BENIGN_LANGOPT(EnableNewConstInterp, 1, 0, "enable the experimental new constant interpreter") -BENIGN_LANGOPT(ForceNewConstInterp, 1, 0, - "force the use of the experimental new constant interpreter") BENIGN_LANGOPT(BracketDepth, 32, 256, "maximum bracket nesting depth") BENIGN_LANGOPT(NumLargeByValueCopy, 32, 0, diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 76592df20ddba..e09c3881dc5dc 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -195,6 +195,34 @@ class LangOptions : public LangOptionsBase { FEA_On }; + // Values of the following enumerations correspond to metadata arguments + // specified for constrained floating-point intrinsics: + // http://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics. + + /// Possible rounding modes. + enum FPRoundingModeKind { + /// Rounding to nearest, corresponds to "round.tonearest". + FPR_ToNearest, + /// Rounding toward -Inf, corresponds to "round.downward". + FPR_Downward, + /// Rounding toward +Inf, corresponds to "round.upward". + FPR_Upward, + /// Rounding toward zero, corresponds to "round.towardzero". + FPR_TowardZero, + /// Is determined by runtime environment, corresponds to "round.dynamic". + FPR_Dynamic + }; + + /// Possible floating point exception behavior. + enum FPExceptionModeKind { + /// Assume that floating-point exceptions are masked. + FPE_Ignore, + /// Transformations do not cause new exceptions but may hide some. + FPE_MayTrap, + /// Strictly preserve the floating-point exception semantics. + FPE_Strict + }; + enum class LaxVectorConversionKind { /// Permit no implicit vector bitcasts. None, diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h index b1be40272513b..1ab45d2ce9a1e 100644 --- a/clang/include/clang/Basic/TargetCXXABI.h +++ b/clang/include/clang/Basic/TargetCXXABI.h @@ -103,6 +103,12 @@ class TargetCXXABI { /// of these details is necessarily final yet. WebAssembly, + /// The Fuchsia ABI is a modified version of the Itanium ABI. + /// + /// The relevant changes from the Itanium ABI are: + /// - constructors and destructors return 'this', as in ARM. + Fuchsia, + /// The Microsoft ABI is the ABI used by Microsoft Visual Studio (and /// compatible compilers). /// @@ -133,6 +139,7 @@ class TargetCXXABI { /// Does this ABI generally fall into the Itanium family of ABIs? bool isItaniumFamily() const { switch (getKind()) { + case Fuchsia: case GenericAArch64: case GenericItanium: case GenericARM: @@ -152,6 +159,7 @@ class TargetCXXABI { /// Is this ABI an MSVC-compatible ABI? bool isMicrosoft() const { switch (getKind()) { + case Fuchsia: case GenericAArch64: case GenericItanium: case GenericARM: @@ -182,6 +190,7 @@ class TargetCXXABI { case WebAssembly: // WebAssembly doesn't require any special alignment for member functions. return false; + case Fuchsia: case GenericARM: case GenericAArch64: case GenericMIPS: @@ -257,6 +266,7 @@ class TargetCXXABI { /// done on a generic Itanium platform. bool canKeyFunctionBeInline() const { switch (getKind()) { + case Fuchsia: case GenericARM: case iOS64: case WebAssembly: @@ -277,27 +287,18 @@ class TargetCXXABI { /// padding of a base class? /// /// This decision cannot be changed without breaking platform ABI - /// compatibility, and yet it is tied to language guarantees which - /// the committee has so far seen fit to strengthen no less than - /// three separate times: - /// - originally, there were no restrictions at all; - /// - C++98 declared that objects could not be allocated in the - /// tail padding of a POD type; - /// - C++03 extended the definition of POD to include classes - /// containing member pointers; and - /// - C++11 greatly broadened the definition of POD to include - /// all trivial standard-layout classes. - /// Each of these changes technically took several existing - /// platforms and made them permanently non-conformant. + /// compatibility. In ISO C++98, tail padding reuse was only permitted for + /// non-POD base classes, but that restriction was removed retroactively by + /// DR 43, and tail padding reuse is always permitted in all de facto C++ + /// language modes. However, many platforms use a variant of the old C++98 + /// rule for compatibility. enum TailPaddingUseRules { /// The tail-padding of a base class is always theoretically - /// available, even if it's POD. This is not strictly conforming - /// in any language mode. + /// available, even if it's POD. AlwaysUseTailPadding, /// Only allocate objects in the tail padding of a base class if /// the base class is not POD according to the rules of C++ TR1. - /// This is non-strictly conforming in C++11 mode. UseTailPaddingUnlessPOD03, /// Only allocate objects in the tail padding of a base class if @@ -318,6 +319,7 @@ class TargetCXXABI { // iOS on ARM64 and WebAssembly use the C++11 POD rules. They do not honor // the Itanium exception about classes with over-large bitfields. + case Fuchsia: case iOS64: case WebAssembly: case WatchOS: diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index cc83f4c34c145..33cecdadc686c 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -938,12 +938,6 @@ class TargetInfo : public virtual TransferrableTargetInfo, return true; } - /// Check if the register is reserved globally - /// - /// This function returns true if the register passed in RegName is reserved - /// using the corresponding -ffixed-RegName option. - virtual bool isRegisterReservedGlobally(StringRef) const { return true; } - // validateOutputConstraint, validateInputConstraint - Checks that // a constraint is valid and provides information about it. // FIXME: These should return a real error instead of just true/false. diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index bb9873efac853..79cd16233c104 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -17,118 +17,118 @@ include "arm_neon_incl.td" let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { // Negate - def VNEGSH : SInst<"vneg", "ss", "Sh">; + def VNEGSH : SInst<"vneg", "11", "Sh">; // Reciprocal/Sqrt - def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; - def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; - def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; + def SCALAR_FRECPSH : IInst<"vrecps", "111", "Sh">; + def FSQRTSH : SInst<"vsqrt", "11", "Sh">; + def SCALAR_FRSQRTSH : IInst<"vrsqrts", "111", "Sh">; // Reciprocal Estimate - def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; + def SCALAR_FRECPEH : IInst<"vrecpe", "11", "Sh">; // Reciprocal Exponent - def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">; + def SCALAR_FRECPXH : IInst<"vrecpx", "11", "Sh">; // Reciprocal Square Root Estimate - def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">; + def SCALAR_FRSQRTEH : IInst<"vrsqrte", "11", "Sh">; // Rounding - def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; - def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; - def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; - def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; - def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; - def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; - def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + def FRINTZ_S64H : SInst<"vrnd", "11", "Sh">; + def FRINTA_S64H : SInst<"vrnda", "11", "Sh">; + def FRINTI_S64H : SInst<"vrndi", "11", "Sh">; + def FRINTM_S64H : SInst<"vrndm", "11", "Sh">; + def FRINTN_S64H : SInst<"vrndn", "11", "Sh">; + def FRINTP_S64H : SInst<"vrndp", "11", "Sh">; + def FRINTX_S64H : SInst<"vrndx", "11", "Sh">; // Conversion - def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "sUs">; - def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "iUi">; - def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "Ys", "lUl">; - def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; - def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; - def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; - def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; - def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; - def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; - def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; - def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; - def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; - def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; - def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; - def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; - def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; - def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; - def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">; - def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">; - def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; - def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">; - def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">; - def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; - def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">; - def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">; - def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; - def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">; - def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">; - def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; - def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">; - def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; - def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; - def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "(1F)(1!)", "sUs">; + def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "(1F<)(1!)", "iUi">; + def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "(1F<<)(1!)", "lUl">; + def SCALAR_FCVTZSH : SInst<"vcvt_s16", "(1S)1", "Sh">; + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTZUH : SInst<"vcvt_u16", "(1U)1", "Sh">; + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTASH : SInst<"vcvta_s16", "(1S)1", "Sh">; + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTAUH : SInst<"vcvta_u16", "(1U)1", "Sh">; + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "(1S)1", "Sh">; + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "(1U)1", "Sh">; + def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "(1S)1", "Sh">; + def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "(1U)1", "Sh">; + def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "(1S)1", "Sh">; + def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "(1U)1", "Sh">; + def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">; let isVCVT_N = 1 in { - def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "sUs">; - def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "Ysi", "iUi">; - def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "Ysi", "lUl">; - def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; - def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; - def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; - def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; - def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; - def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; + def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">; + def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">; + def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">; + def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">; + def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">; + def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">; + def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">; + def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">; + def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">; } // Comparison - def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; - def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; - def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; - def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; - def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; - def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; - def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; - def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; - def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; - def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; + def SCALAR_CMEQRH : SInst<"vceq", "(1U)11", "Sh">; + def SCALAR_CMEQZH : SInst<"vceqz", "(1U)1", "Sh">; + def SCALAR_CMGERH : SInst<"vcge", "(1U)11", "Sh">; + def SCALAR_CMGEZH : SInst<"vcgez", "(1U)1", "Sh">; + def SCALAR_CMGTRH : SInst<"vcgt", "(1U)11", "Sh">; + def SCALAR_CMGTZH : SInst<"vcgtz", "(1U)1", "Sh">; + def SCALAR_CMLERH : SInst<"vcle", "(1U)11", "Sh">; + def SCALAR_CMLEZH : SInst<"vclez", "(1U)1", "Sh">; + def SCALAR_CMLTH : SInst<"vclt", "(1U)11", "Sh">; + def SCALAR_CMLTZH : SInst<"vcltz", "(1U)1", "Sh">; // Absolute Compare Mask Greater Than Or Equal - def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">; - def SCALAR_FACLEH : IInst<"vcale", "bss", "Sh">; + def SCALAR_FACGEH : IInst<"vcage", "(1U)11", "Sh">; + def SCALAR_FACLEH : IInst<"vcale", "(1U)11", "Sh">; // Absolute Compare Mask Greater Than - def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">; - def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">; + def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "Sh">; + def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "Sh">; // Scalar Absolute Value - def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; + def SCALAR_ABSH : SInst<"vabs", "11", "Sh">; // Scalar Absolute Difference - def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">; + def SCALAR_ABDH: IInst<"vabd", "111", "Sh">; // Add/Sub - def VADDSH : SInst<"vadd", "sss", "Sh">; - def VSUBHS : SInst<"vsub", "sss", "Sh">; + def VADDSH : SInst<"vadd", "111", "Sh">; + def VSUBHS : SInst<"vsub", "111", "Sh">; // Max/Min - def VMAXHS : SInst<"vmax", "sss", "Sh">; - def VMINHS : SInst<"vmin", "sss", "Sh">; - def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; - def FMINNMHS : SInst<"vminnm", "sss", "Sh">; + def VMAXHS : SInst<"vmax", "111", "Sh">; + def VMINHS : SInst<"vmin", "111", "Sh">; + def FMAXNMHS : SInst<"vmaxnm", "111", "Sh">; + def FMINNMHS : SInst<"vminnm", "111", "Sh">; // Multiplication/Division - def VMULHS : SInst<"vmul", "sss", "Sh">; - def MULXHS : SInst<"vmulx", "sss", "Sh">; - def FDIVHS : SInst<"vdiv", "sss", "Sh">; + def VMULHS : SInst<"vmul", "111", "Sh">; + def MULXHS : SInst<"vmulx", "111", "Sh">; + def FDIVHS : SInst<"vdiv", "111", "Sh">; // Vector fused multiply-add operations - def VFMAHS : SInst<"vfma", "ssss", "Sh">; - def VFMSHS : SInst<"vfms", "ssss", "Sh">; + def VFMAHS : SInst<"vfma", "1111", "Sh">; + def VFMSHS : SInst<"vfms", "1111", "Sh">; } diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index d8d199f464d93..5fa9fc008202b 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -28,25 +28,111 @@ foreach n = [ 2, 4 ] in { "Intrinsic::arm_mve_vld"#n#"q":$IRIntr)>; } +multiclass bit_op_fp { +def "": Intrinsic; +} + +multiclass bit_op_fp_with_inv { +def "": Intrinsic; +} + let params = T.Int in { def vaddq: Intrinsic; +def vandq: Intrinsic; +def vbicq: Intrinsic; +def veorq: Intrinsic; +def vornq: Intrinsic; +def vorrq: Intrinsic; def vsubq: Intrinsic; +def vmulq: Intrinsic; +def vmulhq: Intrinsic $a, $b)>; +def vrmulhq: Intrinsic $a, $b)>; } let params = T.Float in { def vaddqf: Intrinsic, NameOverride<"vaddq">; +defm vandqf: bit_op_fp, NameOverride<"vandq">; +defm vbicqf: bit_op_fp_with_inv, NameOverride<"vbicq">; +defm veorqf: bit_op_fp, NameOverride<"veorq">; +defm vornqf: bit_op_fp_with_inv, NameOverride<"vornq">; +defm vorrqf: bit_op_fp, NameOverride<"vorrq">; def vsubqf: Intrinsic, NameOverride<"vsubq">; +def vmulqf: Intrinsic, + NameOverride<"vmulq">; +} + +// The bitcasting below is not overcomplicating the IR because while +// Vector and UVector may be different vector types at the C level i.e. +// vectors of same size signed/unsigned ints. Once they're lowered +// to IR, they are just bit vectors with no sign at all, so the +// bitcasts will be automatically elided by IRBuilder. +multiclass predicated_bit_op_fp { +def "": Intrinsic + (bitcast $a, UVector), + (bitcast $b, UVector), + $pred, + (bitcast $inactive, UVector)), Vector)>; +} + +// Plain intrinsics +let params = T.Usual in { +def vabdq: Intrinsic $a, $b)>; } +// Predicated intrinsics let params = T.Usual in { +def vabdq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"abd_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; def vaddq_m: Intrinsic< Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), (IRInt<"add_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; def vsubq_m: Intrinsic< Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), (IRInt<"sub_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmulq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"mul_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +defm vandq_m: predicated_bit_op_fp<"and_predicated">; +defm vbicq_m: predicated_bit_op_fp<"bic_predicated">; +defm veorq_m: predicated_bit_op_fp<"eor_predicated">; +defm vornq_m: predicated_bit_op_fp<"orn_predicated">; +defm vorrq_m: predicated_bit_op_fp<"orr_predicated">; +} + +// Predicated intrinsics - Int types only +let params = T.Int in { +def vminq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmaxq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmulhq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"mulh_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vrmulhq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"rmulh_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +} + +// Predicated intrinsics - Float types only +let params = T.Float in { +def vminnmq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmaxnmq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; } let params = T.Int in { @@ -117,6 +203,54 @@ let params = T.Float in { defm: compare<"le", fcmp_le>; } +let params = T.Signed in { + def vminq: Intrinsic; + def vmaxq: Intrinsic; +} +let params = T.Unsigned in { + def vminqu: Intrinsic, + NameOverride<"vminq">; + def vmaxqu: Intrinsic, + NameOverride<"vmaxq">; +} +let params = T.Float in { + def vminnmq: Intrinsic $a, $b)>; + def vmaxnmq: Intrinsic $a, $b)>; +} + +def vpselq: Intrinsic { let params = T.Usual; } +def vpselq_64: Intrinsic< + Vector, (args Vector:$t, Vector:$f, PredOf:$pred), + (bitcast (select $pred, (bitcast $t, VecOf), + (bitcast $f, VecOf)), Vector)>, + NameOverride<"vpselq"> { let params = T.All64; } + +let params = [Void], pnt = PNT_None in { + + multiclass vctp { + def "": Intrinsic (IRIntBase $val)))>; + def _m: Intrinsic (and $inpred, + (IRIntBase $val))))>; + } + defm vctp8q: vctp, "arm_mve_vctp8">; + defm vctp16q: vctp, "arm_mve_vctp16">; + defm vctp32q: vctp, "arm_mve_vctp32">; + defm vctp64q: vctp, "arm_mve_vctp64">; + + def vpnot: Intrinsic, (args unpromoted>:$pred), + (xor $pred, (u16 65535))>; + +} + multiclass contiguous_load same_size, list wider> { // Intrinsics named with explicit memory and element sizes that match: diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 27cdada02ec4f..d837a1d33d000 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -58,12 +58,16 @@ class CGHelperFn : IRBuilderBase { let prefix = func # "(Builder, "; } def add: IRBuilder<"CreateAdd">; +def mul: IRBuilder<"CreateMul">; +def not: IRBuilder<"CreateNot">; def or: IRBuilder<"CreateOr">; def and: IRBuilder<"CreateAnd">; +def xor: IRBuilder<"CreateXor">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; def fadd: IRBuilder<"CreateFAdd">; +def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; def load: IRBuilder<"CreateLoad"> { let special_params = [IRBuilderAddrParam<0>]; @@ -103,6 +107,7 @@ def fcmp_ge: IRBuilder<"CreateFCmpOGE">; def fcmp_lt: IRBuilder<"CreateFCmpOLT">; def fcmp_le: IRBuilder<"CreateFCmpOLE">; def splat: CGHelperFn<"ARMMVEVectorSplat">; +def select: IRBuilder<"CreateSelect">; // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 127c5af97ce67..a4dc21b643110 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -109,7 +109,8 @@ def OP_OR : Op<(op "|", $p0, $p1)>; def OP_XOR : Op<(op "^", $p0, $p1)>; def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>; def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>; -def OP_CAST : Op<(cast "R", $p0)>; +def OP_CAST : LOp<[(save_temp $promote, $p0), + (cast "R", $promote)]>; def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>; def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>; def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>; @@ -226,240 +227,240 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1, //////////////////////////////////////////////////////////////////////////////// // E.3.1 Addition -def VADD : IOpInst<"vadd", "ddd", +def VADD : IOpInst<"vadd", "...", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_ADD>; -def VADDL : SOpInst<"vaddl", "wdd", "csiUcUsUi", OP_ADDL>; -def VADDW : SOpInst<"vaddw", "wwd", "csiUcUsUi", OP_ADDW>; -def VHADD : SInst<"vhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VRHADD : SInst<"vrhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VQADD : SInst<"vqadd", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VADDHN : IInst<"vaddhn", "hkk", "silUsUiUl">; -def VRADDHN : IInst<"vraddhn", "hkk", "silUsUiUl">; +def VADDL : SOpInst<"vaddl", "(>Q)..", "csiUcUsUi", OP_ADDL>; +def VADDW : SOpInst<"vaddw", "(>Q)(>Q).", "csiUcUsUi", OP_ADDW>; +def VHADD : SInst<"vhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VRHADD : SInst<"vrhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VQADD : SInst<"vqadd", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VADDHN : IInst<"vaddhn", "; +def VRADDHN : IInst<"vraddhn", "; //////////////////////////////////////////////////////////////////////////////// // E.3.2 Multiplication -def VMUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; -def VMULP : SInst<"vmul", "ddd", "PcQPc">; -def VMLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; -def VMLAL : SOpInst<"vmlal", "wwdd", "csiUcUsUi", OP_MLAL>; -def VMLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; -def VMLSL : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>; -def VQDMULH : SInst<"vqdmulh", "ddd", "siQsQi">; -def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">; +def VMUL : IOpInst<"vmul", "...", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; +def VMULP : SInst<"vmul", "...", "PcQPc">; +def VMLA : IOpInst<"vmla", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; +def VMLAL : SOpInst<"vmlal", "(>Q)(>Q)..", "csiUcUsUi", OP_MLAL>; +def VMLS : IOpInst<"vmls", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; +def VMLSL : SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", OP_MLSL>; +def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">; +def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">; let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { -def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>; -def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>; +def VQRDMLAH : SOpInst<"vqrdmlah", "....", "siQsQi", OP_QRDMLAH>; +def VQRDMLSH : SOpInst<"vqrdmlsh", "....", "siQsQi", OP_QRDMLSH>; } -def VQDMLAL : SInst<"vqdmlal", "wwdd", "si">; -def VQDMLSL : SInst<"vqdmlsl", "wwdd", "si">; -def VMULL : SInst<"vmull", "wdd", "csiUcUsUiPc">; -def VQDMULL : SInst<"vqdmull", "wdd", "si">; +def VQDMLAL : SInst<"vqdmlal", "(>Q)(>Q)..", "si">; +def VQDMLSL : SInst<"vqdmlsl", "(>Q)(>Q)..", "si">; +def VMULL : SInst<"vmull", "(>Q)..", "csiUcUsUiPc">; +def VQDMULL : SInst<"vqdmull", "(>Q)..", "si">; //////////////////////////////////////////////////////////////////////////////// // E.3.3 Subtraction -def VSUB : IOpInst<"vsub", "ddd", +def VSUB : IOpInst<"vsub", "...", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_SUB>; -def VSUBL : SOpInst<"vsubl", "wdd", "csiUcUsUi", OP_SUBL>; -def VSUBW : SOpInst<"vsubw", "wwd", "csiUcUsUi", OP_SUBW>; -def VQSUB : SInst<"vqsub", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VHSUB : SInst<"vhsub", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VSUBHN : IInst<"vsubhn", "hkk", "silUsUiUl">; -def VRSUBHN : IInst<"vrsubhn", "hkk", "silUsUiUl">; +def VSUBL : SOpInst<"vsubl", "(>Q)..", "csiUcUsUi", OP_SUBL>; +def VSUBW : SOpInst<"vsubw", "(>Q)(>Q).", "csiUcUsUi", OP_SUBW>; +def VQSUB : SInst<"vqsub", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VHSUB : SInst<"vhsub", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VSUBHN : IInst<"vsubhn", "; +def VRSUBHN : IInst<"vrsubhn", "; //////////////////////////////////////////////////////////////////////////////// // E.3.4 Comparison -def VCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; -def VCGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; +def VCEQ : IOpInst<"vceq", "U..", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; +def VCGE : SOpInst<"vcge", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; let InstName = "vcge" in -def VCLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; -def VCGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; +def VCLE : SOpInst<"vcle", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; +def VCGT : SOpInst<"vcgt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; let InstName = "vcgt" in -def VCLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; +def VCLT : SOpInst<"vclt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; let InstName = "vacge" in { -def VCAGE : IInst<"vcage", "udd", "fQf">; -def VCALE : IInst<"vcale", "udd", "fQf">; +def VCAGE : IInst<"vcage", "U..", "fQf">; +def VCALE : IInst<"vcale", "U..", "fQf">; } let InstName = "vacgt" in { -def VCAGT : IInst<"vcagt", "udd", "fQf">; -def VCALT : IInst<"vcalt", "udd", "fQf">; +def VCAGT : IInst<"vcagt", "U..", "fQf">; +def VCALT : IInst<"vcalt", "U..", "fQf">; } -def VTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; +def VTST : WInst<"vtst", "U..", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.5 Absolute Difference -def VABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VABDL : SOpInst<"vabdl", "wdd", "csiUcUsUi", OP_ABDL>; -def VABA : SOpInst<"vaba", "dddd", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; -def VABAL : SOpInst<"vabal", "wwdd", "csiUcUsUi", OP_ABAL>; +def VABD : SInst<"vabd", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VABDL : SOpInst<"vabdl", "(>Q)..", "csiUcUsUi", OP_ABDL>; +def VABA : SOpInst<"vaba", "....", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; +def VABAL : SOpInst<"vabal", "(>Q)(>Q)..", "csiUcUsUi", OP_ABAL>; //////////////////////////////////////////////////////////////////////////////// // E.3.6 Max/Min -def VMAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VMIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VMAX : SInst<"vmax", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VMIN : SInst<"vmin", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.7 Pairwise Addition -def VPADD : IInst<"vpadd", "ddd", "csiUcUsUif">; -def VPADDL : SInst<"vpaddl", "nd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VPADAL : SInst<"vpadal", "nnd", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VPADD : IInst<"vpadd", "...", "csiUcUsUif">; +def VPADDL : SInst<"vpaddl", ">.", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VPADAL : SInst<"vpadal", ">>.", "csiUcUsUiQcQsQiQUcQUsQUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.8-9 Folding Max/Min -def VPMAX : SInst<"vpmax", "ddd", "csiUcUsUif">; -def VPMIN : SInst<"vpmin", "ddd", "csiUcUsUif">; +def VPMAX : SInst<"vpmax", "...", "csiUcUsUif">; +def VPMIN : SInst<"vpmin", "...", "csiUcUsUif">; //////////////////////////////////////////////////////////////////////////////// // E.3.10 Reciprocal/Sqrt -def VRECPS : IInst<"vrecps", "ddd", "fQf">; -def VRSQRTS : IInst<"vrsqrts", "ddd", "fQf">; +def VRECPS : IInst<"vrecps", "...", "fQf">; +def VRSQRTS : IInst<"vrsqrts", "...", "fQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.11 Shifts by signed variable -def VSHL : SInst<"vshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL : SInst<"vqshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHL : SInst<"vrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQRSHL : SInst<"vqrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSHL : SInst<"vshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHL : SInst<"vqshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSHL : SInst<"vrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.12 Shifts by constant let isShift = 1 in { -def VSHR_N : SInst<"vshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSHL_N : IInst<"vshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHR_N : SInst<"vrshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSRA_N : SInst<"vsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSRA_N : SInst<"vrsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL_N : SInst<"vqshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHLU_N : SInst<"vqshlu_n", "udi", "csilQcQsQiQl">; -def VSHRN_N : IInst<"vshrn_n", "hki", "silUsUiUl">; -def VQSHRUN_N : SInst<"vqshrun_n", "eki", "sil">; -def VQRSHRUN_N : SInst<"vqrshrun_n", "eki", "sil">; -def VQSHRN_N : SInst<"vqshrn_n", "hki", "silUsUiUl">; -def VRSHRN_N : IInst<"vrshrn_n", "hki", "silUsUiUl">; -def VQRSHRN_N : SInst<"vqrshrn_n", "hki", "silUsUiUl">; -def VSHLL_N : SInst<"vshll_n", "wdi", "csiUcUsUi">; +def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">; +def VSHRN_N : IInst<"vshrn_n", "; +def VQSHRUN_N : SInst<"vqshrun_n", "(; +def VQRSHRUN_N : SInst<"vqrshrun_n", "(; +def VQSHRN_N : SInst<"vqshrn_n", "; +def VRSHRN_N : IInst<"vrshrn_n", "; +def VQRSHRN_N : SInst<"vqrshrn_n", "; +def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.13 Shifts with insert -def VSRI_N : WInst<"vsri_n", "dddi", +def VSRI_N : WInst<"vsri_n", "...I", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; -def VSLI_N : WInst<"vsli_n", "dddi", +def VSLI_N : WInst<"vsli_n", "...I", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; } //////////////////////////////////////////////////////////////////////////////// // E.3.14 Loads and stores of a single vector -def VLD1 : WInst<"vld1", "dc", +def VLD1 : WInst<"vld1", ".(c*!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD1_X2 : WInst<"vld1_x2", "2c", +def VLD1_X2 : WInst<"vld1_x2", "2(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_X3 : WInst<"vld1_x3", "3c", +def VLD1_X3 : WInst<"vld1_x3", "3(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_X4 : WInst<"vld1_x4", "4c", +def VLD1_X4 : WInst<"vld1_x4", "4(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_LANE : WInst<"vld1_lane", "dcdi", +def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD1_DUP : WInst<"vld1_dup", "dc", +def VLD1_DUP : WInst<"vld1_dup", ".(c*!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST1 : WInst<"vst1", "vpd", +def VST1 : WInst<"vst1", "v*(.!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST1_X2 : WInst<"vst1_x2", "vp2", +def VST1_X2 : WInst<"vst1_x2", "v*(2!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_X3 : WInst<"vst1_x3", "vp3", +def VST1_X3 : WInst<"vst1_x3", "v*(3!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_X4 : WInst<"vst1_x4", "vp4", +def VST1_X4 : WInst<"vst1_x4", "v*(4!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_LANE : WInst<"vst1_lane", "vpdi", +def VST1_LANE : WInst<"vst1_lane", "v*(.!)I", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; let ArchGuard = "(__ARM_FP & 2)" in { -def VLD1_F16 : WInst<"vld1", "dc", "hQh">; -def VLD1_X2_F16 : WInst<"vld1_x2", "2c", "hQh">; -def VLD1_X3_F16 : WInst<"vld1_x3", "3c", "hQh">; -def VLD1_X4_F16 : WInst<"vld1_x4", "4c", "hQh">; -def VLD1_LANE_F16 : WInst<"vld1_lane", "dcdi", "hQh">; -def VLD1_DUP_F16 : WInst<"vld1_dup", "dc", "hQh">; -def VST1_F16 : WInst<"vst1", "vpd", "hQh">; -def VST1_X2_F16 : WInst<"vst1_x2", "vp2", "hQh">; -def VST1_X3_F16 : WInst<"vst1_x3", "vp3", "hQh">; -def VST1_X4_F16 : WInst<"vst1_x4", "vp4", "hQh">; -def VST1_LANE_F16 : WInst<"vst1_lane", "vpdi", "hQh">; +def VLD1_F16 : WInst<"vld1", ".(c*!)", "hQh">; +def VLD1_X2_F16 : WInst<"vld1_x2", "2(c*!)", "hQh">; +def VLD1_X3_F16 : WInst<"vld1_x3", "3(c*!)", "hQh">; +def VLD1_X4_F16 : WInst<"vld1_x4", "4(c*!)", "hQh">; +def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">; +def VLD1_DUP_F16 : WInst<"vld1_dup", ".(c*!)", "hQh">; +def VST1_F16 : WInst<"vst1", "v*(.!)", "hQh">; +def VST1_X2_F16 : WInst<"vst1_x2", "v*(2!)", "hQh">; +def VST1_X3_F16 : WInst<"vst1_x3", "v*(3!)", "hQh">; +def VST1_X4_F16 : WInst<"vst1_x4", "v*(4!)", "hQh">; +def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">; } //////////////////////////////////////////////////////////////////////////////// // E.3.15 Loads and stores of an N-element structure -def VLD2 : WInst<"vld2", "2c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD3 : WInst<"vld3", "3c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD4 : WInst<"vld4", "4c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD2_DUP : WInst<"vld2_dup", "2c", +def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD2_DUP : WInst<"vld2_dup", "2(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD3_DUP : WInst<"vld3_dup", "3c", +def VLD3_DUP : WInst<"vld3_dup", "3(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD4_DUP : WInst<"vld4_dup", "4c", +def VLD4_DUP : WInst<"vld4_dup", "4(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD2_LANE : WInst<"vld2_lane", "2c2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VLD3_LANE : WInst<"vld3_lane", "3c3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VLD4_LANE : WInst<"vld4_lane", "4c4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST2 : WInst<"vst2", "vp2", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST3 : WInst<"vst3", "vp3", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST4 : WInst<"vst4", "vp4", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST2_LANE : WInst<"vst2_lane", "vp2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST3_LANE : WInst<"vst3_lane", "vp3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST4_LANE : WInst<"vst4_lane", "vp4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; let ArchGuard = "(__ARM_FP & 2)" in { -def VLD2_F16 : WInst<"vld2", "2c", "hQh">; -def VLD3_F16 : WInst<"vld3", "3c", "hQh">; -def VLD4_F16 : WInst<"vld4", "4c", "hQh">; -def VLD2_DUP_F16 : WInst<"vld2_dup", "2c", "hQh">; -def VLD3_DUP_F16 : WInst<"vld3_dup", "3c", "hQh">; -def VLD4_DUP_F16 : WInst<"vld4_dup", "4c", "hQh">; -def VLD2_LANE_F16 : WInst<"vld2_lane", "2c2i", "hQh">; -def VLD3_LANE_F16 : WInst<"vld3_lane", "3c3i", "hQh">; -def VLD4_LANE_F16 : WInst<"vld4_lane", "4c4i", "hQh">; -def VST2_F16 : WInst<"vst2", "vp2", "hQh">; -def VST3_F16 : WInst<"vst3", "vp3", "hQh">; -def VST4_F16 : WInst<"vst4", "vp4", "hQh">; -def VST2_LANE_F16 : WInst<"vst2_lane", "vp2i", "hQh">; -def VST3_LANE_F16 : WInst<"vst3_lane", "vp3i", "hQh">; -def VST4_LANE_F16 : WInst<"vst4_lane", "vp4i", "hQh">; +def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">; +def VLD3_F16 : WInst<"vld3", "3(c*!)", "hQh">; +def VLD4_F16 : WInst<"vld4", "4(c*!)", "hQh">; +def VLD2_DUP_F16 : WInst<"vld2_dup", "2(c*!)", "hQh">; +def VLD3_DUP_F16 : WInst<"vld3_dup", "3(c*!)", "hQh">; +def VLD4_DUP_F16 : WInst<"vld4_dup", "4(c*!)", "hQh">; +def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">; +def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">; +def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">; +def VST2_F16 : WInst<"vst2", "v*(2!)", "hQh">; +def VST3_F16 : WInst<"vst3", "v*(3!)", "hQh">; +def VST4_F16 : WInst<"vst4", "v*(4!)", "hQh">; +def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">; +def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">; +def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">; } //////////////////////////////////////////////////////////////////////////////// // E.3.16 Extract lanes from a vector let InstName = "vmov" in -def VGET_LANE : IInst<"vget_lane", "sdi", +def VGET_LANE : IInst<"vget_lane", "1.I", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.17 Set lanes within a vector let InstName = "vmov" in -def VSET_LANE : IInst<"vset_lane", "dsdi", +def VSET_LANE : IInst<"vset_lane", ".1.I", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.18 Initialize a vector from bit pattern -def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST> { +def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> { let BigEndianSafe = 1; } //////////////////////////////////////////////////////////////////////////////// // E.3.19 Set all lanes to same value let InstName = "vmov" in { -def VDUP_N : WOpInst<"vdup_n", "ds", +def VDUP_N : WOpInst<"vdup_n", ".1", "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; -def VMOV_N : WOpInst<"vmov_n", "ds", +def VMOV_N : WOpInst<"vmov_n", ".1", "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; } let InstName = "" in -def VDUP_LANE: WOpInst<"vdup_lane", "dgi", +def VDUP_LANE: WOpInst<"vdup_lane", ".qI", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP_LN>; //////////////////////////////////////////////////////////////////////////////// // E.3.20 Combining vectors -def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; +def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// // E.3.21 Splitting vectors @@ -468,127 +469,127 @@ def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; // versions of these intrinsics in both AArch32 and AArch64 architectures. See // D45668 for more details. let InstName = "vmov" in { -def VGET_HIGH : NoTestOpInst<"vget_high", "dk", "csilhfUcUsUiUlPcPs", OP_HI>; -def VGET_LOW : NoTestOpInst<"vget_low", "dk", "csilhfUcUsUiUlPcPs", OP_LO>; +def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>; +def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// // E.3.22 Converting vectors let ArchGuard = "(__ARM_FP & 2)" in { - def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "md", "Hf">; - def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "wd", "h">; + def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "(; + def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "(>Q)(.!)", "h">; } -def VCVT_S32 : SInst<"vcvt_s32", "xd", "fQf">; -def VCVT_U32 : SInst<"vcvt_u32", "ud", "fQf">; -def VCVT_F32 : SInst<"vcvt_f32", "fd", "iUiQiQUi">; +def VCVT_S32 : SInst<"vcvt_s32", "S.", "fQf">; +def VCVT_U32 : SInst<"vcvt_u32", "U.", "fQf">; +def VCVT_F32 : SInst<"vcvt_f32", "F(.!)", "iUiQiQUi">; let isVCVT_N = 1 in { -def VCVT_N_S32 : SInst<"vcvt_n_s32", "xdi", "fQf">; -def VCVT_N_U32 : SInst<"vcvt_n_u32", "udi", "fQf">; -def VCVT_N_F32 : SInst<"vcvt_n_f32", "fdi", "iUiQiQUi">; +def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf">; +def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf">; +def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">; } -def VMOVN : IInst<"vmovn", "hk", "silUsUiUl">; -def VMOVL : SInst<"vmovl", "wd", "csiUcUsUi">; -def VQMOVN : SInst<"vqmovn", "hk", "silUsUiUl">; -def VQMOVUN : SInst<"vqmovun", "ek", "sil">; +def VMOVN : IInst<"vmovn", "; +def VMOVL : SInst<"vmovl", "(>Q).", "csiUcUsUi">; +def VQMOVN : SInst<"vqmovn", "; +def VQMOVUN : SInst<"vqmovun", "(; //////////////////////////////////////////////////////////////////////////////// // E.3.23-24 Table lookup, Extended table lookup let InstName = "vtbl" in { -def VTBL1 : WInst<"vtbl1", "ddt", "UccPc">; -def VTBL2 : WInst<"vtbl2", "d2t", "UccPc">; -def VTBL3 : WInst<"vtbl3", "d3t", "UccPc">; -def VTBL4 : WInst<"vtbl4", "d4t", "UccPc">; +def VTBL1 : WInst<"vtbl1", "..p", "UccPc">; +def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">; +def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">; +def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">; } let InstName = "vtbx" in { -def VTBX1 : WInst<"vtbx1", "dddt", "UccPc">; -def VTBX2 : WInst<"vtbx2", "dd2t", "UccPc">; -def VTBX3 : WInst<"vtbx3", "dd3t", "UccPc">; -def VTBX4 : WInst<"vtbx4", "dd4t", "UccPc">; +def VTBX1 : WInst<"vtbx1", "...p", "UccPc">; +def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">; +def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">; +def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">; } //////////////////////////////////////////////////////////////////////////////// // E.3.25 Operations with a scalar value -def VMLA_LANE : IOpInst<"vmla_lane", "dddgi", +def VMLA_LANE : IOpInst<"vmla_lane", "...qI", "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; -def VMLAL_LANE : SOpInst<"vmlal_lane", "wwddi", "siUsUi", OP_MLAL_LN>; -def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "wwddi", "si", OP_QDMLAL_LN>; -def VMLS_LANE : IOpInst<"vmls_lane", "dddgi", +def VMLAL_LANE : SOpInst<"vmlal_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLAL_LN>; +def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "(>Q)(>Q)..I", "si", OP_QDMLAL_LN>; +def VMLS_LANE : IOpInst<"vmls_lane", "...qI", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; -def VMLSL_LANE : SOpInst<"vmlsl_lane", "wwddi", "siUsUi", OP_MLSL_LN>; -def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "wwddi", "si", OP_QDMLSL_LN>; -def VMUL_N : IOpInst<"vmul_n", "dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; -def VMUL_LANE : IOpInst<"vmul_lane", "ddgi", +def VMLSL_LANE : SOpInst<"vmlsl_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLSL_LN>; +def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "(>Q)(>Q)..I", "si", OP_QDMLSL_LN>; +def VMUL_N : IOpInst<"vmul_n", "..1", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; +def VMUL_LANE : IOpInst<"vmul_lane", "..qI", "sifUsUiQsQiQfQUsQUi", OP_MUL_LN>; -def VMULL_N : SOpInst<"vmull_n", "wds", "siUsUi", OP_MULL_N>; -def VMULL_LANE : SOpInst<"vmull_lane", "wddi", "siUsUi", OP_MULL_LN>; -def VQDMULL_N : SOpInst<"vqdmull_n", "wds", "si", OP_QDMULL_N>; -def VQDMULL_LANE : SOpInst<"vqdmull_lane", "wddi", "si", OP_QDMULL_LN>; -def VQDMULH_N : SOpInst<"vqdmulh_n", "dds", "siQsQi", OP_QDMULH_N>; -def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>; -def VQRDMULH_N : SOpInst<"vqrdmulh_n", "dds", "siQsQi", OP_QRDMULH_N>; -def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>; +def VMULL_N : SOpInst<"vmull_n", "(>Q).1", "siUsUi", OP_MULL_N>; +def VMULL_LANE : SOpInst<"vmull_lane", "(>Q)..I", "siUsUi", OP_MULL_LN>; +def VQDMULL_N : SOpInst<"vqdmull_n", "(>Q).1", "si", OP_QDMULL_N>; +def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>; +def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>; +def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>; +def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>; +def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { -def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>; -def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>; +def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>; } -def VMLA_N : IOpInst<"vmla_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; -def VMLAL_N : SOpInst<"vmlal_n", "wwds", "siUsUi", OP_MLAL_N>; -def VQDMLAL_N : SOpInst<"vqdmlal_n", "wwds", "si", OP_QDMLAL_N>; -def VMLS_N : IOpInst<"vmls_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; -def VMLSL_N : SOpInst<"vmlsl_n", "wwds", "siUsUi", OP_MLSL_N>; -def VQDMLSL_N : SOpInst<"vqdmlsl_n", "wwds", "si", OP_QDMLSL_N>; +def VMLA_N : IOpInst<"vmla_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; +def VMLAL_N : SOpInst<"vmlal_n", "(>Q)(>Q).1", "siUsUi", OP_MLAL_N>; +def VQDMLAL_N : SOpInst<"vqdmlal_n", "(>Q)(>Q).1", "si", OP_QDMLAL_N>; +def VMLS_N : IOpInst<"vmls_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; +def VMLSL_N : SOpInst<"vmlsl_n", "(>Q)(>Q).1", "siUsUi", OP_MLSL_N>; +def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>; //////////////////////////////////////////////////////////////////////////////// // E.3.26 Vector Extract -def VEXT : WInst<"vext", "dddi", +def VEXT : WInst<"vext", "...I", "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.27 Reverse vector elements -def VREV64 : WOpInst<"vrev64", "dd", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", +def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", OP_REV64>; -def VREV32 : WOpInst<"vrev32", "dd", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; -def VREV16 : WOpInst<"vrev16", "dd", "cUcPcQcQUcQPc", OP_REV16>; +def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; +def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>; //////////////////////////////////////////////////////////////////////////////// // E.3.28 Other single operand arithmetic -def VABS : SInst<"vabs", "dd", "csifQcQsQiQf">; -def VQABS : SInst<"vqabs", "dd", "csiQcQsQi">; -def VNEG : SOpInst<"vneg", "dd", "csifQcQsQiQf", OP_NEG>; -def VQNEG : SInst<"vqneg", "dd", "csiQcQsQi">; -def VCLS : SInst<"vcls", "dd", "csiQcQsQi">; -def VCLZ : IInst<"vclz", "dd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VCNT : WInst<"vcnt", "dd", "UccPcQUcQcQPc">; -def VRECPE : SInst<"vrecpe", "dd", "fUiQfQUi">; -def VRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUi">; +def VABS : SInst<"vabs", "..", "csifQcQsQiQf">; +def VQABS : SInst<"vqabs", "..", "csiQcQsQi">; +def VNEG : SOpInst<"vneg", "..", "csifQcQsQiQf", OP_NEG>; +def VQNEG : SInst<"vqneg", "..", "csiQcQsQi">; +def VCLS : SInst<"vcls", "..", "csiQcQsQi">; +def VCLZ : IInst<"vclz", "..", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VCNT : WInst<"vcnt", "..", "UccPcQUcQcQPc">; +def VRECPE : SInst<"vrecpe", "..", "fUiQfQUi">; +def VRSQRTE : SInst<"vrsqrte", "..", "fUiQfQUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.29 Logical operations -def VMVN : LOpInst<"vmvn", "dd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; -def VAND : LOpInst<"vand", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; -def VORR : LOpInst<"vorr", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; -def VEOR : LOpInst<"veor", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; -def VBIC : LOpInst<"vbic", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; -def VORN : LOpInst<"vorn", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; +def VMVN : LOpInst<"vmvn", "..", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; +def VAND : LOpInst<"vand", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; +def VORR : LOpInst<"vorr", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; +def VEOR : LOpInst<"veor", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; +def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; +def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; let isHiddenLInst = 1 in -def VBSL : SInst<"vbsl", "dudd", +def VBSL : SInst<"vbsl", ".U..", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.30 Transposition operations -def VTRN : WInst<"vtrn", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VZIP : WInst<"vzip", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VUZP : WInst<"vuzp", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.31 Vector reinterpret cast operations def VREINTERPRET - : NoTestOpInst<"vreinterpret", "dd", + : NoTestOpInst<"vreinterpret", "..", "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> { let CartesianProductOfTypes = 1; let ArchGuard = "!defined(__aarch64__)"; @@ -599,17 +600,17 @@ def VREINTERPRET // Vector fused multiply-add operations let ArchGuard = "defined(__ARM_FEATURE_FMA)" in { - def VFMA : SInst<"vfma", "dddd", "fQf">; - def VFMS : SOpInst<"vfms", "dddd", "fQf", OP_FMLS>; - def FMLA_N_F32 : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; + def VFMA : SInst<"vfma", "....", "fQf">; + def VFMS : SOpInst<"vfms", "....", "fQf", OP_FMLS>; + def FMLA_N_F32 : SOpInst<"vfma_n", "...1", "fQf", OP_FMLA_N>; } //////////////////////////////////////////////////////////////////////////////// // fp16 vector operations -def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; -def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; -def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; -def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; +def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "1.I", "h", OP_SCALAR_HALF_GET_LN>; +def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", ".1.I", "h", OP_SCALAR_HALF_SET_LN>; +def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; +def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics @@ -618,474 +619,474 @@ let ArchGuard = "defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store -def LD1 : WInst<"vld1", "dc", "dQdPlQPl">; -def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">; -def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">; -def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">; -def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">; -def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">; -def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">; -def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">; - -def LD1_X2 : WInst<"vld1_x2", "2c", +def LD1 : WInst<"vld1", ".(c*!)", "dQdPlQPl">; +def LD2 : WInst<"vld2", "2(c*!)", "QUlQldQdPlQPl">; +def LD3 : WInst<"vld3", "3(c*!)", "QUlQldQdPlQPl">; +def LD4 : WInst<"vld4", "4(c*!)", "QUlQldQdPlQPl">; +def ST1 : WInst<"vst1", "v*(.!)", "dQdPlQPl">; +def ST2 : WInst<"vst2", "v*(2!)", "QUlQldQdPlQPl">; +def ST3 : WInst<"vst3", "v*(3!)", "QUlQldQdPlQPl">; +def ST4 : WInst<"vst4", "v*(4!)", "QUlQldQdPlQPl">; + +def LD1_X2 : WInst<"vld1_x2", "2(c*!)", "dQdPlQPl">; -def LD1_X3 : WInst<"vld1_x3", "3c", +def LD1_X3 : WInst<"vld1_x3", "3(c*!)", "dQdPlQPl">; -def LD1_X4 : WInst<"vld1_x4", "4c", +def LD1_X4 : WInst<"vld1_x4", "4(c*!)", "dQdPlQPl">; -def ST1_X2 : WInst<"vst1_x2", "vp2", "dQdPlQPl">; -def ST1_X3 : WInst<"vst1_x3", "vp3", "dQdPlQPl">; -def ST1_X4 : WInst<"vst1_x4", "vp4", "dQdPlQPl">; +def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">; +def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">; +def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">; -def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">; -def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">; -def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">; +def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">; +def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD1_DUP : WInst<"vld1_dup", "dc", "dQdPlQPl">; -def LD2_DUP : WInst<"vld2_dup", "2c", "dQdPlQPl">; -def LD3_DUP : WInst<"vld3_dup", "3c", "dQdPlQPl">; -def LD4_DUP : WInst<"vld4_dup", "4c", "dQdPlQPl">; +def LD1_DUP : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">; +def LD2_DUP : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">; +def LD3_DUP : WInst<"vld3_dup", "3(c*!)", "dQdPlQPl">; +def LD4_DUP : WInst<"vld4_dup", "4(c*!)", "dQdPlQPl">; -def VLDRQ : WInst<"vldrq", "sc", "Pk">; -def VSTRQ : WInst<"vstrq", "vps", "Pk">; +def VLDRQ : WInst<"vldrq", "1(c*!)", "Pk">; +def VSTRQ : WInst<"vstrq", "v*(1!)", "Pk">; //////////////////////////////////////////////////////////////////////////////// // Addition -def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>; +def ADD : IOpInst<"vadd", "...", "dQd", OP_ADD>; //////////////////////////////////////////////////////////////////////////////// // Subtraction -def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>; +def SUB : IOpInst<"vsub", "...", "dQd", OP_SUB>; //////////////////////////////////////////////////////////////////////////////// // Multiplication -def MUL : IOpInst<"vmul", "ddd", "dQd", OP_MUL>; -def MLA : IOpInst<"vmla", "dddd", "dQd", OP_MLA>; -def MLS : IOpInst<"vmls", "dddd", "dQd", OP_MLS>; +def MUL : IOpInst<"vmul", "...", "dQd", OP_MUL>; +def MLA : IOpInst<"vmla", "....", "dQd", OP_MLA>; +def MLS : IOpInst<"vmls", "....", "dQd", OP_MLS>; //////////////////////////////////////////////////////////////////////////////// // Multiplication Extended -def MULX : SInst<"vmulx", "ddd", "fdQfQd">; +def MULX : SInst<"vmulx", "...", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Division -def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; +def FDIV : IOpInst<"vdiv", "...", "fdQfQd", OP_DIV>; //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations -def FMLA : SInst<"vfma", "dddd", "dQd">; -def FMLS : SOpInst<"vfms", "dddd", "dQd", OP_FMLS>; +def FMLA : SInst<"vfma", "....", "dQd">; +def FMLS : SOpInst<"vfms", "....", "dQd", OP_FMLS>; //////////////////////////////////////////////////////////////////////////////// // MUL, MLA, MLS, FMA, FMS definitions with scalar argument -def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; +def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "dQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>; -def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; -def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; +def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>; +def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations -def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">; +def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">; //////////////////////////////////////////////////////////////////////////////// // Absolute Difference -def ABD : SInst<"vabd", "ddd", "dQd">; +def ABD : SInst<"vabd", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // saturating absolute/negate -def ABS : SInst<"vabs", "dd", "dQdlQl">; -def QABS : SInst<"vqabs", "dd", "lQl">; -def NEG : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>; -def QNEG : SInst<"vqneg", "dd", "lQl">; +def ABS : SInst<"vabs", "..", "dQdlQl">; +def QABS : SInst<"vqabs", "..", "lQl">; +def NEG : SOpInst<"vneg", "..", "dlQdQl", OP_NEG>; +def QNEG : SInst<"vqneg", "..", "lQl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Accumulated of Unsigned Value -def SUQADD : SInst<"vuqadd", "ddu", "csilQcQsQiQl">; +def SUQADD : SInst<"vuqadd", "..U", "csilQcQsQiQl">; //////////////////////////////////////////////////////////////////////////////// // Unsigned Saturating Accumulated of Signed Value -def USQADD : SInst<"vsqadd", "ddx", "UcUsUiUlQUcQUsQUiQUl">; +def USQADD : SInst<"vsqadd", "..S", "UcUsUiUlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // Reciprocal/Sqrt -def FRECPS : IInst<"vrecps", "ddd", "dQd">; -def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">; -def FRECPE : SInst<"vrecpe", "dd", "dQd">; -def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">; -def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; +def FRECPS : IInst<"vrecps", "...", "dQd">; +def FRSQRTS : IInst<"vrsqrts", "...", "dQd">; +def FRECPE : SInst<"vrecpe", "..", "dQd">; +def FRSQRTE : SInst<"vrsqrte", "..", "dQd">; +def FSQRT : SInst<"vsqrt", "..", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // bitwise reverse -def RBIT : IInst<"vrbit", "dd", "cUcPcQcQUcQPc">; +def RBIT : IInst<"vrbit", "..", "cUcPcQcQUcQPc">; //////////////////////////////////////////////////////////////////////////////// // Integer extract and narrow to high -def XTN2 : SOpInst<"vmovn_high", "qhk", "silUsUiUl", OP_XTN>; +def XTN2 : SOpInst<"vmovn_high", "(; //////////////////////////////////////////////////////////////////////////////// // Signed integer saturating extract and unsigned narrow to high -def SQXTUN2 : SOpInst<"vqmovun_high", "emd", "HsHiHl", OP_SQXTUN>; +def SQXTUN2 : SOpInst<"vqmovun_high", "(; //////////////////////////////////////////////////////////////////////////////// // Integer saturating extract and narrow to high -def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>; +def QXTN2 : SOpInst<"vqmovn_high", "(; //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">; -def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">; +def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "(; +def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "(>Q).", "f">; -def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">; -def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">; -def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">; +def VCVT_S64 : SInst<"vcvt_s64", "S.", "dQd">; +def VCVT_U64 : SInst<"vcvt_u64", "U.", "dQd">; +def VCVT_F64 : SInst<"vcvt_f64", "F(.!)", "lUlQlQUl">; -def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "hmj", "Hf", OP_VCVT_NA_HI_F16>; -def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>; -def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>; -def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>; +def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "<(; +def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "(>Q)(Q!)", "h", OP_VCVT_EX_HI_F32>; +def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "(; +def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "(>Q)(Q!)", "f", OP_VCVT_EX_HI_F64>; -def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; -def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; +def VCVTX_F32_F64 : SInst<"vcvtx_f32", "(F<)(Q!)", "d">; +def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "(; //////////////////////////////////////////////////////////////////////////////// // Comparison -def FCAGE : IInst<"vcage", "udd", "dQd">; -def FCAGT : IInst<"vcagt", "udd", "dQd">; -def FCALE : IInst<"vcale", "udd", "dQd">; -def FCALT : IInst<"vcalt", "udd", "dQd">; -def CMTST : WInst<"vtst", "udd", "lUlPlQlQUlQPl">; -def CFMEQ : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>; - -def CMEQ : SInst<"vceqz", "ud", +def FCAGE : IInst<"vcage", "U..", "dQd">; +def FCAGT : IInst<"vcagt", "U..", "dQd">; +def FCALE : IInst<"vcale", "U..", "dQd">; +def FCALT : IInst<"vcalt", "U..", "dQd">; +def CMTST : WInst<"vtst", "U..", "lUlPlQlQUlQPl">; +def CFMEQ : SOpInst<"vceq", "U..", "lUldQdQlQUlPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "U..", "lUldQdQlQUl", OP_GE>; +def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>; +def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>; +def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>; + +def CMEQ : SInst<"vceqz", "U.", "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; -def CMGE : SInst<"vcgez", "ud", "csilfdQcQsQiQlQfQd">; -def CMLE : SInst<"vclez", "ud", "csilfdQcQsQiQlQfQd">; -def CMGT : SInst<"vcgtz", "ud", "csilfdQcQsQiQlQfQd">; -def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; +def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">; +def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">; +def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">; +def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer -def MAX : SInst<"vmax", "ddd", "dQd">; -def MIN : SInst<"vmin", "ddd", "dQd">; +def MAX : SInst<"vmax", "...", "dQd">; +def MIN : SInst<"vmin", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min -def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; -def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">; +def MAXP : SInst<"vpmax", "...", "QcQsQiQUcQUsQUiQfQd">; +def MINP : SInst<"vpmin", "...", "QcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise MaxNum/MinNum Floating Point -def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">; -def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; +def FMAXNMP : SInst<"vpmaxnm", "...", "fQfQd">; +def FMINNMP : SInst<"vpminnm", "...", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Addition -def ADDP : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">; +def ADDP : IInst<"vpadd", "...", "QcQsQiQlQUcQUsQUiQUlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Shifts by constant let isShift = 1 in { // Left shift long high -def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", +def SHLL_HIGH_N : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi", OP_LONG_HI>; //////////////////////////////////////////////////////////////////////////////// -def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">; -def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">; +def SRI_N : WInst<"vsri_n", "...I", "PlQPl">; +def SLI_N : WInst<"vsli_n", "...I", "PlQPl">; // Right shift narrow high -def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", +def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "<(; -def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "hmdi", +def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "<(; -def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "hmdi", +def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "<(; -def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "hmdi", +def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "<(; -def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "hmdi", +def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "<(; -def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "hmdi", +def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "<(; } //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; +def VMOVL_HIGH : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; let isVCVT_N = 1 in { -def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "lUlQlQUl">; -def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "dQd">; -def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "dQd">; +def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">; +def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">; +def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // 3VDiff class using high 64-bit in operands -def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>; -def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>; -def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>; -def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>; +def VADDL_HIGH : SOpInst<"vaddl_high", "(>Q)QQ", "csiUcUsUi", OP_ADDLHi>; +def VADDW_HIGH : SOpInst<"vaddw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_ADDWHi>; +def VSUBL_HIGH : SOpInst<"vsubl_high", "(>Q)QQ", "csiUcUsUi", OP_SUBLHi>; +def VSUBW_HIGH : SOpInst<"vsubw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_SUBWHi>; -def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>; -def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>; +def VABDL_HIGH : SOpInst<"vabdl_high", "(>Q)QQ", "csiUcUsUi", OP_ABDLHi>; +def VABAL_HIGH : SOpInst<"vabal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_ABALHi>; -def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>; -def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>; -def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>; -def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>; -def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>; -def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>; +def VMULL_HIGH : SOpInst<"vmull_high", "(>Q)QQ", "csiUcUsUiPc", OP_MULLHi>; +def VMULL_HIGH_N : SOpInst<"vmull_high_n", "(>Q)Q1", "siUsUi", OP_MULLHi_N>; +def VMLAL_HIGH : SOpInst<"vmlal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLALHi>; +def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLALHi_N>; +def VMLSL_HIGH : SOpInst<"vmlsl_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLSLHi>; +def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLSLHi_N>; -def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>; -def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>; -def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>; -def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>; +def VADDHN_HIGH : SOpInst<"vaddhn_high", "(; +def VRADDHN_HIGH : SOpInst<"vraddhn_high", "(; +def VSUBHN_HIGH : SOpInst<"vsubhn_high", "(; +def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "(; -def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; -def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>; -def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; -def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>; -def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; -def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>; -def VMULL_P64 : SInst<"vmull", "rss", "Pl">; -def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>; +def VQDMULL_HIGH : SOpInst<"vqdmull_high", "(>Q)QQ", "si", OP_QDMULLHi>; +def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "(>Q)Q1", "si", OP_QDMULLHi_N>; +def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>; +def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>; +def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>; +def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>; +def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">; +def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector -def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">; -def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">; -def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", +def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">; +def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">; +def COPY_LANE : IOpInst<"vcopy_lane", "..I.I", "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; -def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", +def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; -def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", +def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI", "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>; -def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", +def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value -def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>; -def VDUP_LANE2: WOpInst<"vdup_laneq", "dji", +def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>; +def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI", "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; -def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>; -def MOV_N : WOpInst<"vmov_n", "ds", "dQdPlQPl", OP_DUP>; +def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>; +def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>; //////////////////////////////////////////////////////////////////////////////// -def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>; +def COMBINE : NoTestOpInst<"vcombine", "Q..", "dPl", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// //Initialize a vector from bit pattern -def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST> { +def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> { let BigEndianSafe = 1; } //////////////////////////////////////////////////////////////////////////////// -def VMLA_LANEQ : IOpInst<"vmla_laneq", "dddji", +def VMLA_LANEQ : IOpInst<"vmla_laneq", "...QI", "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; -def VMLS_LANEQ : IOpInst<"vmls_laneq", "dddji", +def VMLS_LANEQ : IOpInst<"vmls_laneq", "...QI", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; -def VFMA_LANE : IInst<"vfma_lane", "dddgi", "fdQfQd">; -def VFMA_LANEQ : IInst<"vfma_laneq", "dddji", "fdQfQd"> { +def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd">; +def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd"> { let isLaneQ = 1; } -def VFMS_LANE : IOpInst<"vfms_lane", "dddgi", "fdQfQd", OP_FMS_LN>; -def VFMS_LANEQ : IOpInst<"vfms_laneq", "dddji", "fdQfQd", OP_FMS_LNQ>; +def VFMS_LANE : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>; +def VFMS_LANEQ : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>; -def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "wwdki", "siUsUi", OP_MLAL_LN>; -def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "wwkdi", "siUsUi", +def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>; +def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi", OP_MLALHi_LN>; -def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "wwkki", "siUsUi", +def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi", OP_MLALHi_LN>; -def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "wwdki", "siUsUi", OP_MLSL_LN>; -def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "wwkdi", "siUsUi", +def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>; +def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi", OP_MLSLHi_LN>; -def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "wwkki", "siUsUi", +def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi", OP_MLSLHi_LN>; -def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "wwdki", "si", OP_QDMLAL_LN>; -def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "wwkdi", "si", +def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>; +def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si", OP_QDMLALHi_LN>; -def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "wwkki", "si", +def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si", OP_QDMLALHi_LN>; -def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "wwdki", "si", OP_QDMLSL_LN>; -def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "wwkdi", "si", +def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>; +def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si", OP_QDMLSLHi_LN>; -def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "wwkki", "si", +def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si", OP_QDMLSLHi_LN>; // Newly add double parameter for vmul_lane in aarch64 // Note: d type is handled by SCALAR_VMUL_LANE -def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>; +def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>; // Note: d type is handled by SCALAR_VMUL_LANEQ -def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji", +def VMUL_LANEQ : IOpInst<"vmul_laneq", "..QI", "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>; -def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>; -def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi", +def VMULL_LANEQ : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>; +def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi", OP_MULLHi_LN>; -def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "wkki", "siUsUi", +def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi", OP_MULLHi_LN>; -def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "wdki", "si", OP_QDMULL_LN>; -def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "wkdi", "si", +def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>; +def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si", OP_QDMULLHi_LN>; -def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "wkki", "si", +def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si", OP_QDMULLHi_LN>; -def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>; -def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>; +def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "..QI", "siQsQi", OP_QDMULH_LN>; +def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "..QI", "siQsQi", OP_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { -def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>; -def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>; +def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>; } // Note: d type implemented by SCALAR_VMULX_LANE -def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>; +def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; // Note: d type is implemented by SCALAR_VMULX_LANEQ -def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fQfQd", OP_MULX_LN>; +def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>; //////////////////////////////////////////////////////////////////////////////// // Across vectors class -def VADDLV : SInst<"vaddlv", "rd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VMAXV : SInst<"vmaxv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; -def VMINV : SInst<"vminv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; -def VADDV : SInst<"vaddv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; -def FMAXNMV : SInst<"vmaxnmv", "sd", "fQfQd">; -def FMINNMV : SInst<"vminnmv", "sd", "fQfQd">; +def VADDLV : SInst<"vaddlv", "(1>).", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VMAXV : SInst<"vmaxv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; +def VMINV : SInst<"vminv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; +def VADDV : SInst<"vaddv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; +def FMAXNMV : SInst<"vmaxnmv", "1.", "fQfQd">; +def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Newly added Vector Extract for f64 -def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">; +def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Crypto let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)" in { -def AESE : SInst<"vaese", "ddd", "QUc">; -def AESD : SInst<"vaesd", "ddd", "QUc">; -def AESMC : SInst<"vaesmc", "dd", "QUc">; -def AESIMC : SInst<"vaesimc", "dd", "QUc">; - -def SHA1H : SInst<"vsha1h", "ss", "Ui">; -def SHA1SU1 : SInst<"vsha1su1", "ddd", "QUi">; -def SHA256SU0 : SInst<"vsha256su0", "ddd", "QUi">; - -def SHA1C : SInst<"vsha1c", "ddsd", "QUi">; -def SHA1P : SInst<"vsha1p", "ddsd", "QUi">; -def SHA1M : SInst<"vsha1m", "ddsd", "QUi">; -def SHA1SU0 : SInst<"vsha1su0", "dddd", "QUi">; -def SHA256H : SInst<"vsha256h", "dddd", "QUi">; -def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">; -def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; +def AESE : SInst<"vaese", "...", "QUc">; +def AESD : SInst<"vaesd", "...", "QUc">; +def AESMC : SInst<"vaesmc", "..", "QUc">; +def AESIMC : SInst<"vaesimc", "..", "QUc">; + +def SHA1H : SInst<"vsha1h", "11", "Ui">; +def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">; +def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">; + +def SHA1C : SInst<"vsha1c", "..1.", "QUi">; +def SHA1P : SInst<"vsha1p", "..1.", "QUi">; +def SHA1M : SInst<"vsha1m", "..1.", "QUi">; +def SHA1SU0 : SInst<"vsha1su0", "....", "QUi">; +def SHA256H : SInst<"vsha256h", "....", "QUi">; +def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; +def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; } //////////////////////////////////////////////////////////////////////////////// // Float -> Int conversions with explicit rounding mode let ArchGuard = "__ARM_ARCH >= 8" in { -def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; -def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; -def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; -def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; -def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; -def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; -def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; -def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; +def FCVTNS_S32 : SInst<"vcvtn_s32", "S.", "fQf">; +def FCVTNU_S32 : SInst<"vcvtn_u32", "U.", "fQf">; +def FCVTPS_S32 : SInst<"vcvtp_s32", "S.", "fQf">; +def FCVTPU_S32 : SInst<"vcvtp_u32", "U.", "fQf">; +def FCVTMS_S32 : SInst<"vcvtm_s32", "S.", "fQf">; +def FCVTMU_S32 : SInst<"vcvtm_u32", "U.", "fQf">; +def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">; +def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { -def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">; -def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">; -def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">; -def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">; -def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">; -def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">; -def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; -def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; +def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">; +def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">; +def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">; +def FCVTPU_S64 : SInst<"vcvtp_u64", "U.", "dQd">; +def FCVTMS_S64 : SInst<"vcvtm_s64", "S.", "dQd">; +def FCVTMU_S64 : SInst<"vcvtm_u64", "U.", "dQd">; +def FCVTAS_S64 : SInst<"vcvta_s64", "S.", "dQd">; +def FCVTAU_S64 : SInst<"vcvta_u64", "U.", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // Round to Integral let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def FRINTN_S32 : SInst<"vrndn", "dd", "fQf">; -def FRINTA_S32 : SInst<"vrnda", "dd", "fQf">; -def FRINTP_S32 : SInst<"vrndp", "dd", "fQf">; -def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">; -def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">; -def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">; -def FRINTI_S32 : SInst<"vrndi", "dd", "fQf">; +def FRINTN_S32 : SInst<"vrndn", "..", "fQf">; +def FRINTA_S32 : SInst<"vrnda", "..", "fQf">; +def FRINTP_S32 : SInst<"vrndp", "..", "fQf">; +def FRINTM_S32 : SInst<"vrndm", "..", "fQf">; +def FRINTX_S32 : SInst<"vrndx", "..", "fQf">; +def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">; +def FRINTI_S32 : SInst<"vrndi", "..", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def FRINTN_S64 : SInst<"vrndn", "dd", "dQd">; -def FRINTA_S64 : SInst<"vrnda", "dd", "dQd">; -def FRINTP_S64 : SInst<"vrndp", "dd", "dQd">; -def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">; -def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">; -def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">; -def FRINTI_S64 : SInst<"vrndi", "dd", "dQd">; +def FRINTN_S64 : SInst<"vrndn", "..", "dQd">; +def FRINTA_S64 : SInst<"vrnda", "..", "dQd">; +def FRINTP_S64 : SInst<"vrndp", "..", "dQd">; +def FRINTM_S64 : SInst<"vrndm", "..", "dQd">; +def FRINTX_S64 : SInst<"vrndx", "..", "dQd">; +def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">; +def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { -def FMAXNM_S32 : SInst<"vmaxnm", "ddd", "fQf">; -def FMINNM_S32 : SInst<"vminnm", "ddd", "fQf">; +def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">; +def FMINNM_S32 : SInst<"vminnm", "...", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { -def FMAXNM_S64 : SInst<"vmaxnm", "ddd", "dQd">; -def FMINNM_S64 : SInst<"vminnm", "ddd", "dQd">; +def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">; +def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // Permutation -def VTRN1 : SOpInst<"vtrn1", "ddd", +def VTRN1 : SOpInst<"vtrn1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; -def VZIP1 : SOpInst<"vzip1", "ddd", +def VZIP1 : SOpInst<"vzip1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>; -def VUZP1 : SOpInst<"vuzp1", "ddd", +def VUZP1 : SOpInst<"vuzp1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>; -def VTRN2 : SOpInst<"vtrn2", "ddd", +def VTRN2 : SOpInst<"vtrn2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>; -def VZIP2 : SOpInst<"vzip2", "ddd", +def VZIP2 : SOpInst<"vzip2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>; -def VUZP2 : SOpInst<"vuzp2", "ddd", +def VUZP2 : SOpInst<"vuzp2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>; //////////////////////////////////////////////////////////////////////////////// // Table lookup let InstName = "vtbl" in { -def VQTBL1_A64 : WInst<"vqtbl1", "dju", "UccPcQUcQcQPc">; -def VQTBL2_A64 : WInst<"vqtbl2", "dBu", "UccPcQUcQcQPc">; -def VQTBL3_A64 : WInst<"vqtbl3", "dCu", "UccPcQUcQcQPc">; -def VQTBL4_A64 : WInst<"vqtbl4", "dDu", "UccPcQUcQcQPc">; +def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">; +def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">; +def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">; +def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">; } let InstName = "vtbx" in { -def VQTBX1_A64 : WInst<"vqtbx1", "ddju", "UccPcQUcQcQPc">; -def VQTBX2_A64 : WInst<"vqtbx2", "ddBu", "UccPcQUcQcQPc">; -def VQTBX3_A64 : WInst<"vqtbx3", "ddCu", "UccPcQUcQcQPc">; -def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; +def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">; +def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">; +def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">; +def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; } //////////////////////////////////////////////////////////////////////////////// @@ -1095,7 +1096,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; // itself during generation so, unlike all other intrinsics, this one should // include *all* types, not just additional ones. def VVREINTERPRET - : NoTestOpInst<"vreinterpret", "dd", + : NoTestOpInst<"vreinterpret", "..", "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> { let CartesianProductOfTypes = 1; let BigEndianSafe = 1; @@ -1107,332 +1108,332 @@ def VVREINTERPRET // Scalar Arithmetic // Scalar Addition -def SCALAR_ADD : SInst<"vadd", "sss", "SlSUl">; +def SCALAR_ADD : SInst<"vadd", "111", "SlSUl">; // Scalar Saturating Add -def SCALAR_QADD : SInst<"vqadd", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QADD : SInst<"vqadd", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Subtraction -def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; +def SCALAR_SUB : SInst<"vsub", "111", "SlSUl">; // Scalar Saturating Sub -def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSUB : SInst<"vqsub", "111", "ScSsSiSlSUcSUsSUiSUl">; let InstName = "vmov" in { -def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>; -def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>; +def VGET_HIGH_A64 : NoTestOpInst<"vget_high", ".Q", "dPl", OP_HI>; +def VGET_LOW_A64 : NoTestOpInst<"vget_low", ".Q", "dPl", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// // Scalar Shift // Scalar Shift Left -def SCALAR_SHL: SInst<"vshl", "sss", "SlSUl">; +def SCALAR_SHL: SInst<"vshl", "111", "SlSUl">; // Scalar Saturating Shift Left -def SCALAR_QSHL: SInst<"vqshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSHL: SInst<"vqshl", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Saturating Rounding Shift Left -def SCALAR_QRSHL: SInst<"vqrshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QRSHL: SInst<"vqrshl", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Shift Rounding Left -def SCALAR_RSHL: SInst<"vrshl", "sss", "SlSUl">; +def SCALAR_RSHL: SInst<"vrshl", "111", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Shift (Immediate) let isScalarShift = 1 in { // Signed/Unsigned Shift Right (Immediate) -def SCALAR_SSHR_N: SInst<"vshr_n", "ssi", "SlSUl">; +def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">; // Signed/Unsigned Rounding Shift Right (Immediate) -def SCALAR_SRSHR_N: SInst<"vrshr_n", "ssi", "SlSUl">; +def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">; // Signed/Unsigned Shift Right and Accumulate (Immediate) -def SCALAR_SSRA_N: SInst<"vsra_n", "sssi", "SlSUl">; +def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">; // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate) -def SCALAR_SRSRA_N: SInst<"vrsra_n", "sssi", "SlSUl">; +def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">; // Shift Left (Immediate) -def SCALAR_SHL_N: SInst<"vshl_n", "ssi", "SlSUl">; +def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">; // Signed/Unsigned Saturating Shift Left (Immediate) -def SCALAR_SQSHL_N: SInst<"vqshl_n", "ssi", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">; // Signed Saturating Shift Left Unsigned (Immediate) -def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "ssi", "ScSsSiSl">; +def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">; // Shift Right And Insert (Immediate) -def SCALAR_SRI_N: SInst<"vsri_n", "sssi", "SlSUl">; +def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">; // Shift Left And Insert (Immediate) -def SCALAR_SLI_N: SInst<"vsli_n", "sssi", "SlSUl">; +def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">; let isScalarNarrowShift = 1 in { // Signed/Unsigned Saturating Shift Right Narrow (Immediate) - def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "zsi", "SsSiSlSUsSUiSUl">; + def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate) - def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "zsi", "SsSiSlSUsSUiSUl">; + def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; // Signed Saturating Shift Right Unsigned Narrow (Immediate) - def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "zsi", "SsSiSl">; + def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<)1I", "SsSiSl">; // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) - def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "zsi", "SsSiSl">; + def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<)1I", "SsSiSl">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate) -def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "ysi", "SiSUi">; -def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "osi", "SlSUl">; +def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">; +def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate) -def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "$si", "Sf">; -def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "bsi", "Sf">; -def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "$si", "Sd">; -def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "bsi", "Sd">; +def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">; +def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">; +def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">; +def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Round to Integral let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def SCALAR_FRINTN_S32 : SInst<"vrndn", "ss", "Sf">; +def SCALAR_FRINTN_S32 : SInst<"vrndn", "11", "Sf">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Pairwise Addition (Scalar and Floating Point) -def SCALAR_ADDP : SInst<"vpadd", "sd", "SfSHlSHdSHUl">; +def SCALAR_ADDP : SInst<"vpadd", "1.", "SfSHlSHdSHUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Floating Point Pairwise Max/Min -def SCALAR_FMAXP : SInst<"vpmax", "sd", "SfSQd">; +def SCALAR_FMAXP : SInst<"vpmax", "1.", "SfSQd">; -def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">; +def SCALAR_FMINP : SInst<"vpmin", "1.", "SfSQd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Floating Point Pairwise maxNum/minNum -def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">; -def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">; +def SCALAR_FMAXNMP : SInst<"vpmaxnm", "1.", "SfSQd">; +def SCALAR_FMINNMP : SInst<"vpminnm", "1.", "SfSQd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Saturating Doubling Multiply Half High -def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">; +def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Saturating Rounding Doubling Multiply Half High -def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">; +def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half -def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>; +def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "1111", "SsSi", OP_QRDMLAH>; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half -def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>; +def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "1111", "SsSi", OP_QRDMLSH>; } //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended -def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">; +def SCALAR_FMULX : IInst<"vmulx", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Step -def SCALAR_FRECPS : IInst<"vrecps", "sss", "SfSd">; +def SCALAR_FRECPS : IInst<"vrecps", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Square Root Step -def SCALAR_FRSQRTS : IInst<"vrsqrts", "sss", "SfSd">; +def SCALAR_FRSQRTS : IInst<"vrsqrts", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Integer Convert To Floating-point -def SCALAR_SCVTFS : SInst<"vcvt_f32", "ys", "Si">; -def SCALAR_SCVTFD : SInst<"vcvt_f64", "os", "Sl">; +def SCALAR_SCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "Si">; +def SCALAR_SCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Integer Convert To Floating-point -def SCALAR_UCVTFS : SInst<"vcvt_f32", "ys", "SUi">; -def SCALAR_UCVTFD : SInst<"vcvt_f64", "os", "SUl">; +def SCALAR_UCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "SUi">; +def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Converts -def SCALAR_FCVTXN : IInst<"vcvtx_f32", "ys", "Sd">; -def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "$s", "Sf">; -def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "bs", "Sf">; -def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "$s", "Sd">; -def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "bs", "Sd">; -def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "$s", "Sf">; -def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "bs", "Sf">; -def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "$s", "Sd">; -def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "bs", "Sd">; -def SCALAR_FCVTASS : SInst<"vcvta_s32", "$s", "Sf">; -def SCALAR_FCVTAUS : SInst<"vcvta_u32", "bs", "Sf">; -def SCALAR_FCVTASD : SInst<"vcvta_s64", "$s", "Sd">; -def SCALAR_FCVTAUD : SInst<"vcvta_u64", "bs", "Sd">; -def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "$s", "Sf">; -def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "bs", "Sf">; -def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "$s", "Sd">; -def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "bs", "Sd">; -def SCALAR_FCVTZSS : SInst<"vcvt_s32", "$s", "Sf">; -def SCALAR_FCVTZUS : SInst<"vcvt_u32", "bs", "Sf">; -def SCALAR_FCVTZSD : SInst<"vcvt_s64", "$s", "Sd">; -def SCALAR_FCVTZUD : SInst<"vcvt_u64", "bs", "Sd">; +def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">; +def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; +def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; +def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; +def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; +def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; +def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; +def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; +def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; +def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">; +def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">; +def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">; +def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">; +def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; +def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; +def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; +def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; +def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">; +def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">; +def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">; +def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Estimate -def SCALAR_FRECPE : IInst<"vrecpe", "ss", "SfSd">; +def SCALAR_FRECPE : IInst<"vrecpe", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Exponent -def SCALAR_FRECPX : IInst<"vrecpx", "ss", "SfSd">; +def SCALAR_FRECPX : IInst<"vrecpx", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Square Root Estimate -def SCALAR_FRSQRTE : IInst<"vrsqrte", "ss", "SfSd">; +def SCALAR_FRSQRTE : IInst<"vrsqrte", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Comparison -def SCALAR_CMEQ : SInst<"vceq", "sss", "SlSUl">; -def SCALAR_CMEQZ : SInst<"vceqz", "ss", "SlSUl">; -def SCALAR_CMGE : SInst<"vcge", "sss", "Sl">; -def SCALAR_CMGEZ : SInst<"vcgez", "ss", "Sl">; -def SCALAR_CMHS : SInst<"vcge", "sss", "SUl">; -def SCALAR_CMLE : SInst<"vcle", "sss", "SlSUl">; -def SCALAR_CMLEZ : SInst<"vclez", "ss", "Sl">; -def SCALAR_CMLT : SInst<"vclt", "sss", "SlSUl">; -def SCALAR_CMLTZ : SInst<"vcltz", "ss", "Sl">; -def SCALAR_CMGT : SInst<"vcgt", "sss", "Sl">; -def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">; -def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">; -def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">; +def SCALAR_CMEQ : SInst<"vceq", "111", "SlSUl">; +def SCALAR_CMEQZ : SInst<"vceqz", "11", "SlSUl">; +def SCALAR_CMGE : SInst<"vcge", "111", "Sl">; +def SCALAR_CMGEZ : SInst<"vcgez", "11", "Sl">; +def SCALAR_CMHS : SInst<"vcge", "111", "SUl">; +def SCALAR_CMLE : SInst<"vcle", "111", "SlSUl">; +def SCALAR_CMLEZ : SInst<"vclez", "11", "Sl">; +def SCALAR_CMLT : SInst<"vclt", "111", "SlSUl">; +def SCALAR_CMLTZ : SInst<"vcltz", "11", "Sl">; +def SCALAR_CMGT : SInst<"vcgt", "111", "Sl">; +def SCALAR_CMGTZ : SInst<"vcgtz", "11", "Sl">; +def SCALAR_CMHI : SInst<"vcgt", "111", "SUl">; +def SCALAR_CMTST : SInst<"vtst", "111", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Comparison -def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">; -def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">; -def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">; -def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">; -def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">; -def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">; -def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">; -def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">; -def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">; -def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">; +def SCALAR_FCMEQ : IInst<"vceq", "(1U)11", "SfSd">; +def SCALAR_FCMEQZ : IInst<"vceqz", "(1U)1", "SfSd">; +def SCALAR_FCMGE : IInst<"vcge", "(1U)11", "SfSd">; +def SCALAR_FCMGEZ : IInst<"vcgez", "(1U)1", "SfSd">; +def SCALAR_FCMGT : IInst<"vcgt", "(1U)11", "SfSd">; +def SCALAR_FCMGTZ : IInst<"vcgtz", "(1U)1", "SfSd">; +def SCALAR_FCMLE : IInst<"vcle", "(1U)11", "SfSd">; +def SCALAR_FCMLEZ : IInst<"vclez", "(1U)1", "SfSd">; +def SCALAR_FCMLT : IInst<"vclt", "(1U)11", "SfSd">; +def SCALAR_FCMLTZ : IInst<"vcltz", "(1U)1", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal -def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">; -def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">; +def SCALAR_FACGE : IInst<"vcage", "(1U)11", "SfSd">; +def SCALAR_FACLE : IInst<"vcale", "(1U)11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Absolute Compare Mask Greater Than -def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">; -def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">; +def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "SfSd">; +def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Value -def SCALAR_ABS : SInst<"vabs", "ss", "Sl">; +def SCALAR_ABS : SInst<"vabs", "11", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Difference -def SCALAR_ABD : IInst<"vabd", "sss", "SfSd">; +def SCALAR_ABD : IInst<"vabd", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Absolute Value -def SCALAR_SQABS : SInst<"vqabs", "ss", "ScSsSiSl">; +def SCALAR_SQABS : SInst<"vqabs", "11", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Negate -def SCALAR_NEG : SInst<"vneg", "ss", "Sl">; +def SCALAR_NEG : SInst<"vneg", "11", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Negate -def SCALAR_SQNEG : SInst<"vqneg", "ss", "ScSsSiSl">; +def SCALAR_SQNEG : SInst<"vqneg", "11", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Accumulated of Unsigned Value -def SCALAR_SUQADD : SInst<"vuqadd", "ssb", "ScSsSiSl">; +def SCALAR_SUQADD : SInst<"vuqadd", "11(1U)", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Accumulated of Signed Value -def SCALAR_USQADD : SInst<"vsqadd", "ss$", "SUcSUsSUiSUl">; +def SCALAR_USQADD : SInst<"vsqadd", "11(1S)", "SUcSUsSUiSUl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Add Long -def SCALAR_SQDMLAL : SInst<"vqdmlal", "rrss", "SsSi">; +def SCALAR_SQDMLAL : SInst<"vqdmlal", "(1>)(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Subtract Long -def SCALAR_SQDMLSL : SInst<"vqdmlsl", "rrss", "SsSi">; +def SCALAR_SQDMLSL : SInst<"vqdmlsl", "(1>)(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply Long -def SCALAR_SQDMULL : SInst<"vqdmull", "rss", "SsSi">; +def SCALAR_SQDMULL : SInst<"vqdmull", "(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Extract Unsigned Narrow -def SCALAR_SQXTUN : SInst<"vqmovun", "zs", "SsSiSl">; +def SCALAR_SQXTUN : SInst<"vqmovun", "(1<)1", "SsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Extract Narrow -def SCALAR_SQXTN : SInst<"vqmovn", "zs", "SsSiSl">; +def SCALAR_SQXTN : SInst<"vqmovn", "(1<)1", "SsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Extract Narrow -def SCALAR_UQXTN : SInst<"vqmovn", "zs", "SUsSUiSUl">; +def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">; // Scalar Floating Point multiply (scalar, by element) -def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>; -def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LN>; +def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>; +def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>; // Scalar Floating Point multiply extended (scalar, by element) -def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LN>; +def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>; +def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">; +def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">; // VMUL_LANE_A64 d type implemented using scalar mul lane -def SCALAR_VMUL_LANE : IInst<"vmul_lane", "ddgi", "d">; +def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">; // VMUL_LANEQ d type implemented using scalar mul lane -def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d"> { +def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d"> { let isLaneQ = 1; } // VMULX_LANE d type implemented using scalar vmulx_lane -def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "d", OP_SCALAR_VMULX_LN>; +def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>; // VMULX_LANEQ d type implemented using scalar vmulx_laneq -def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "d", OP_SCALAR_VMULX_LNQ>; +def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>; // Scalar Floating Point fused multiply-add (scalar, by element) -def SCALAR_FMLA_LANE : IInst<"vfma_lane", "sssdi", "SfSd">; -def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">; +def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">; +def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd">; // Scalar Floating Point fused multiply-subtract (scalar, by element) -def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>; -def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>; +def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>; +def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>; // Signed Saturating Doubling Multiply Long (scalar by element) -def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>; -def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LN>; +def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>; +def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>; // Signed Saturating Doubling Multiply-Add Long (scalar by element) -def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">; -def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">; +def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">; +def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi">; // Signed Saturating Doubling Multiply-Subtract Long (scalar by element) -def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">; -def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">; +def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">; +def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi">; // Scalar Integer Saturating Doubling Multiply Half High (scalar by element) -def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>; -def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LN>; +def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>; +def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>; // Scalar Integer Saturating Rounding Doubling Multiply Half High -def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>; -def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>; +def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>; +def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half -def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>; -def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>; // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half -def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>; -def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>; } -def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; -def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; +def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; +def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; } // ARMv8.2-A FP16 vector intrinsics for A32/A64. @@ -1441,234 +1442,252 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { // ARMv8.2-A FP16 one-operand vector intrinsics. // Comparison - def CMEQH : SInst<"vceqz", "ud", "hQh">; - def CMGEH : SInst<"vcgez", "ud", "hQh">; - def CMGTH : SInst<"vcgtz", "ud", "hQh">; - def CMLEH : SInst<"vclez", "ud", "hQh">; - def CMLTH : SInst<"vcltz", "ud", "hQh">; + def CMEQH : SInst<"vceqz", "U.", "hQh">; + def CMGEH : SInst<"vcgez", "U.", "hQh">; + def CMGTH : SInst<"vcgtz", "U.", "hQh">; + def CMLEH : SInst<"vclez", "U.", "hQh">; + def CMLTH : SInst<"vcltz", "U.", "hQh">; // Vector conversion - def VCVT_F16 : SInst<"vcvt_f16", "Hd", "sUsQsQUs">; - def VCVT_S16 : SInst<"vcvt_s16", "xd", "hQh">; - def VCVT_U16 : SInst<"vcvt_u16", "ud", "hQh">; - def VCVTA_S16 : SInst<"vcvta_s16", "xd", "hQh">; - def VCVTA_U16 : SInst<"vcvta_u16", "ud", "hQh">; - def VCVTM_S16 : SInst<"vcvtm_s16", "xd", "hQh">; - def VCVTM_U16 : SInst<"vcvtm_u16", "ud", "hQh">; - def VCVTN_S16 : SInst<"vcvtn_s16", "xd", "hQh">; - def VCVTN_U16 : SInst<"vcvtn_u16", "ud", "hQh">; - def VCVTP_S16 : SInst<"vcvtp_s16", "xd", "hQh">; - def VCVTP_U16 : SInst<"vcvtp_u16", "ud", "hQh">; + def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">; + def VCVT_S16 : SInst<"vcvt_s16", "S.", "hQh">; + def VCVT_U16 : SInst<"vcvt_u16", "U.", "hQh">; + def VCVTA_S16 : SInst<"vcvta_s16", "S.", "hQh">; + def VCVTA_U16 : SInst<"vcvta_u16", "U.", "hQh">; + def VCVTM_S16 : SInst<"vcvtm_s16", "S.", "hQh">; + def VCVTM_U16 : SInst<"vcvtm_u16", "U.", "hQh">; + def VCVTN_S16 : SInst<"vcvtn_s16", "S.", "hQh">; + def VCVTN_U16 : SInst<"vcvtn_u16", "U.", "hQh">; + def VCVTP_S16 : SInst<"vcvtp_s16", "S.", "hQh">; + def VCVTP_U16 : SInst<"vcvtp_u16", "U.", "hQh">; // Vector rounding let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { - def FRINTZH : SInst<"vrnd", "dd", "hQh">; - def FRINTNH : SInst<"vrndn", "dd", "hQh">; - def FRINTAH : SInst<"vrnda", "dd", "hQh">; - def FRINTPH : SInst<"vrndp", "dd", "hQh">; - def FRINTMH : SInst<"vrndm", "dd", "hQh">; - def FRINTXH : SInst<"vrndx", "dd", "hQh">; + def FRINTZH : SInst<"vrnd", "..", "hQh">; + def FRINTNH : SInst<"vrndn", "..", "hQh">; + def FRINTAH : SInst<"vrnda", "..", "hQh">; + def FRINTPH : SInst<"vrndp", "..", "hQh">; + def FRINTMH : SInst<"vrndm", "..", "hQh">; + def FRINTXH : SInst<"vrndx", "..", "hQh">; } // Misc. - def VABSH : SInst<"vabs", "dd", "hQh">; - def VNEGH : SOpInst<"vneg", "dd", "hQh", OP_NEG>; - def VRECPEH : SInst<"vrecpe", "dd", "hQh">; - def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">; + def VABSH : SInst<"vabs", "..", "hQh">; + def VNEGH : SOpInst<"vneg", "..", "hQh", OP_NEG>; + def VRECPEH : SInst<"vrecpe", "..", "hQh">; + def FRSQRTEH : SInst<"vrsqrte", "..", "hQh">; // ARMv8.2-A FP16 two-operands vector intrinsics. // Misc. - def VADDH : SOpInst<"vadd", "ddd", "hQh", OP_ADD>; - def VABDH : SInst<"vabd", "ddd", "hQh">; - def VSUBH : SOpInst<"vsub", "ddd", "hQh", OP_SUB>; + def VADDH : SOpInst<"vadd", "...", "hQh", OP_ADD>; + def VABDH : SInst<"vabd", "...", "hQh">; + def VSUBH : SOpInst<"vsub", "...", "hQh", OP_SUB>; // Comparison let InstName = "vacge" in { - def VCAGEH : SInst<"vcage", "udd", "hQh">; - def VCALEH : SInst<"vcale", "udd", "hQh">; + def VCAGEH : SInst<"vcage", "U..", "hQh">; + def VCALEH : SInst<"vcale", "U..", "hQh">; } let InstName = "vacgt" in { - def VCAGTH : SInst<"vcagt", "udd", "hQh">; - def VCALTH : SInst<"vcalt", "udd", "hQh">; + def VCAGTH : SInst<"vcagt", "U..", "hQh">; + def VCALTH : SInst<"vcalt", "U..", "hQh">; } - def VCEQH : SOpInst<"vceq", "udd", "hQh", OP_EQ>; - def VCGEH : SOpInst<"vcge", "udd", "hQh", OP_GE>; - def VCGTH : SOpInst<"vcgt", "udd", "hQh", OP_GT>; + def VCEQH : SOpInst<"vceq", "U..", "hQh", OP_EQ>; + def VCGEH : SOpInst<"vcge", "U..", "hQh", OP_GE>; + def VCGTH : SOpInst<"vcgt", "U..", "hQh", OP_GT>; let InstName = "vcge" in - def VCLEH : SOpInst<"vcle", "udd", "hQh", OP_LE>; + def VCLEH : SOpInst<"vcle", "U..", "hQh", OP_LE>; let InstName = "vcgt" in - def VCLTH : SOpInst<"vclt", "udd", "hQh", OP_LT>; + def VCLTH : SOpInst<"vclt", "U..", "hQh", OP_LT>; // Vector conversion let isVCVT_N = 1 in { - def VCVT_N_F16 : SInst<"vcvt_n_f16", "Hdi", "sUsQsQUs">; - def VCVT_N_S16 : SInst<"vcvt_n_s16", "xdi", "hQh">; - def VCVT_N_U16 : SInst<"vcvt_n_u16", "udi", "hQh">; + def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">; + def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">; + def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">; } // Max/Min - def VMAXH : SInst<"vmax", "ddd", "hQh">; - def VMINH : SInst<"vmin", "ddd", "hQh">; + def VMAXH : SInst<"vmax", "...", "hQh">; + def VMINH : SInst<"vmin", "...", "hQh">; let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { - def FMAXNMH : SInst<"vmaxnm", "ddd", "hQh">; - def FMINNMH : SInst<"vminnm", "ddd", "hQh">; + def FMAXNMH : SInst<"vmaxnm", "...", "hQh">; + def FMINNMH : SInst<"vminnm", "...", "hQh">; } // Multiplication/Division - def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>; + def VMULH : SOpInst<"vmul", "...", "hQh", OP_MUL>; // Pairwise addition - def VPADDH : SInst<"vpadd", "ddd", "h">; + def VPADDH : SInst<"vpadd", "...", "h">; // Pairwise Max/Min - def VPMAXH : SInst<"vpmax", "ddd", "h">; - def VPMINH : SInst<"vpmin", "ddd", "h">; + def VPMAXH : SInst<"vpmax", "...", "h">; + def VPMINH : SInst<"vpmin", "...", "h">; // Reciprocal/Sqrt - def VRECPSH : SInst<"vrecps", "ddd", "hQh">; - def VRSQRTSH : SInst<"vrsqrts", "ddd", "hQh">; + def VRECPSH : SInst<"vrecps", "...", "hQh">; + def VRSQRTSH : SInst<"vrsqrts", "...", "hQh">; // ARMv8.2-A FP16 three-operands vector intrinsics. // Vector fused multiply-add operations - def VFMAH : SInst<"vfma", "dddd", "hQh">; - def VFMSH : SOpInst<"vfms", "dddd", "hQh", OP_FMLS>; + def VFMAH : SInst<"vfma", "....", "hQh">; + def VFMSH : SOpInst<"vfms", "....", "hQh", OP_FMLS>; // ARMv8.2-A FP16 lane vector intrinsics. // Mul lane - def VMUL_LANEH : IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>; - def VMUL_NH : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>; + def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>; + def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>; // Data processing intrinsics - section 5 // Logical operations let isHiddenLInst = 1 in - def VBSLH : SInst<"vbsl", "dudd", "hQh">; + def VBSLH : SInst<"vbsl", ".U..", "hQh">; // Transposition operations - def VZIPH : WInst<"vzip", "2dd", "hQh">; - def VUZPH : WInst<"vuzp", "2dd", "hQh">; - def VTRNH : WInst<"vtrn", "2dd", "hQh">; + def VZIPH : WInst<"vzip", "2..", "hQh">; + def VUZPH : WInst<"vuzp", "2..", "hQh">; + def VTRNH : WInst<"vtrn", "2..", "hQh">; let ArchGuard = "!defined(__aarch64__)" in { // Set all lanes to same value. // Already implemented prior to ARMv8.2-A. - def VMOV_NH : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>; - def VDUP_NH : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>; - def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>; + def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>; + def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>; + def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>; } // Vector Extract - def VEXTH : WInst<"vext", "dddi", "hQh">; + def VEXTH : WInst<"vext", "...I", "hQh">; // Reverse vector elements - def VREV64H : WOpInst<"vrev64", "dd", "hQh", OP_REV64>; + def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; } // ARMv8.2-A FP16 vector intrinsics for A64 only. let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { // Vector rounding - def FRINTIH : SInst<"vrndi", "dd", "hQh">; + def FRINTIH : SInst<"vrndi", "..", "hQh">; // Misc. - def FSQRTH : SInst<"vsqrt", "dd", "hQh">; + def FSQRTH : SInst<"vsqrt", "..", "hQh">; // Multiplication/Division - def MULXH : SInst<"vmulx", "ddd", "hQh">; - def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; + def MULXH : SInst<"vmulx", "...", "hQh">; + def FDIVH : IOpInst<"vdiv", "...", "hQh", OP_DIV>; // Pairwise addition - def VPADDH1 : SInst<"vpadd", "ddd", "Qh">; + def VPADDH1 : SInst<"vpadd", "...", "Qh">; // Pairwise Max/Min - def VPMAXH1 : SInst<"vpmax", "ddd", "Qh">; - def VPMINH1 : SInst<"vpmin", "ddd", "Qh">; + def VPMAXH1 : SInst<"vpmax", "...", "Qh">; + def VPMINH1 : SInst<"vpmin", "...", "Qh">; // Pairwise MaxNum/MinNum - def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; - def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + def FMAXNMPH : SInst<"vpmaxnm", "...", "hQh">; + def FMINNMPH : SInst<"vpminnm", "...", "hQh">; // ARMv8.2-A FP16 lane vector intrinsics. // FMA lane - def VFMA_LANEH : IInst<"vfma_lane", "dddgi", "hQh">; - def VFMA_LANEQH : IInst<"vfma_laneq", "dddji", "hQh">; + def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh">; + def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh">; // FMA lane with scalar argument - def FMLA_NH : SOpInst<"vfma_n", "ddds", "hQh", OP_FMLA_N>; + def FMLA_NH : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>; // Scalar floating point fused multiply-add (scalar, by element) - def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "sssdi", "Sh">; - def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "sssji", "Sh">; + def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh">; + def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh">; // FMS lane - def VFMS_LANEH : IOpInst<"vfms_lane", "dddgi", "hQh", OP_FMS_LN>; - def VFMS_LANEQH : IOpInst<"vfms_laneq", "dddji", "hQh", OP_FMS_LNQ>; + def VFMS_LANEH : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>; + def VFMS_LANEQH : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>; // FMS lane with scalar argument - def FMLS_NH : SOpInst<"vfms_n", "ddds", "hQh", OP_FMLS_N>; + def FMLS_NH : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>; // Scalar floating foint fused multiply-subtract (scalar, by element) - def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "sssdi", "Sh", OP_FMS_LN>; - def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "sssji", "Sh", OP_FMS_LNQ>; + def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>; + def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>; // Mul lane - def VMUL_LANEQH : IOpInst<"vmul_laneq", "ddji", "hQh", OP_MUL_LN>; + def VMUL_LANEQH : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>; // Scalar floating point multiply (scalar, by element) - def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>; - def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>; + def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>; + def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>; // Mulx lane - def VMULX_LANEH : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>; - def VMULX_LANEQH : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>; - def VMULX_NH : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>; + def VMULX_LANEH : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>; + def VMULX_LANEQH : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>; + def VMULX_NH : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>; // Scalar floating point mulx (scalar, by element) - def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">; - def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">; + def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">; + def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh">; // ARMv8.2-A FP16 reduction vector intrinsics. - def VMAXVH : SInst<"vmaxv", "sd", "hQh">; - def VMINVH : SInst<"vminv", "sd", "hQh">; - def FMAXNMVH : SInst<"vmaxnmv", "sd", "hQh">; - def FMINNMVH : SInst<"vminnmv", "sd", "hQh">; + def VMAXVH : SInst<"vmaxv", "1.", "hQh">; + def VMINVH : SInst<"vminv", "1.", "hQh">; + def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">; + def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; // Permutation - def VTRN1H : SOpInst<"vtrn1", "ddd", "hQh", OP_TRN1>; - def VZIP1H : SOpInst<"vzip1", "ddd", "hQh", OP_ZIP1>; - def VUZP1H : SOpInst<"vuzp1", "ddd", "hQh", OP_UZP1>; - def VTRN2H : SOpInst<"vtrn2", "ddd", "hQh", OP_TRN2>; - def VZIP2H : SOpInst<"vzip2", "ddd", "hQh", OP_ZIP2>; - def VUZP2H : SOpInst<"vuzp2", "ddd", "hQh", OP_UZP2>; - - def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "sdi", "Sh">; - def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "sji", "Sh">; + def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; + def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; + def VUZP1H : SOpInst<"vuzp1", "...", "hQh", OP_UZP1>; + def VTRN2H : SOpInst<"vtrn2", "...", "hQh", OP_TRN2>; + def VZIP2H : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>; + def VUZP2H : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>; + + def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh">; + def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh">; } // v8.2-A dot product instructions. let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in { - def DOT : SInst<"vdot", "dd88", "iQiUiQUi">; - def DOT_LANE : SOpInst<"vdot_lane", "dd87i", "iUiQiQUi", OP_DOT_LN>; + def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">; + def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<; } let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in { // Variants indexing into a 128-bit vector are A64 only. - def UDOT_LANEQ : SOpInst<"vdot_laneq", "dd89i", "iUiQiQUi", OP_DOT_LNQ>; + def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<; } // v8.2-A FP16 fused multiply-add long instructions. let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { - def VFMLAL_LOW : SInst<"vfmlal_low", "nndd", "hQh">; - def VFMLSL_LOW : SInst<"vfmlsl_low", "nndd", "hQh">; - def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">; - def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">; - - def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "ffH0i", "hQh", OP_FMLAL_LN>; - def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "ffH0i", "hQh", OP_FMLSL_LN>; - def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "ffH0i", "hQh", OP_FMLAL_LN_Hi>; - def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "ffH0i", "hQh", OP_FMLSL_LN_Hi>; - - def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "ffH1i", "hQh", OP_FMLAL_LN>; - def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "ffH1i", "hQh", OP_FMLSL_LN>; - def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "ffH1i", "hQh", OP_FMLAL_LN_Hi>; - def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "ffH1i", "hQh", OP_FMLSL_LN_Hi>; + def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">; + def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">; + def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">; + def VFMLSL_HIGH : SInst<"vfmlsl_high", ">>..", "hQh">; + + def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN>; + def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN>; + def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>; + def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>; + + def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>; + def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>; + def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>; + def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>; } + +// v8.3-A Vector complex addition intrinsics +let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { + def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">; + def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">; + def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">; + def VCADDQ_ROT270_FP16 : SInst<"vcaddq_rot270", "QQQ", "h">; +} +let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in { + def VCADD_ROT90 : SInst<"vcadd_rot90", "...", "f">; + def VCADD_ROT270 : SInst<"vcadd_rot270", "...", "f">; + def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">; + def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">; +} +let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in { + def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">; + def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">; +} \ No newline at end of file diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index 984ed787037f6..28b00d162a00d 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -198,10 +198,8 @@ def OP_UNAVAILABLE : Operation { // // The prototype is a string that defines the return type of the intrinsic // and the type of each argument. The return type and every argument gets a -// "modifier" that can change in some way the "base type" of the intrinsic. -// -// The modifier 'd' means "default" and does not modify the base type in any -// way. The available modifiers are given below. +// set of "modifiers" that can change in some way the "base type" of the +// intrinsic. // // Typespecs // --------- @@ -226,41 +224,34 @@ def OP_UNAVAILABLE : Operation { // ------------------- // prototype: return (arg, arg, ...) // -// v: void -// t: best-fit integer (int/poly args) -// x: signed integer (int/float args) -// u: unsigned integer (int/float args) -// f: float (int args) -// F: double (int args) -// H: half (int args) -// 0: half (int args), ignore 'Q' size modifier. -// 1: half (int args), force 'Q' size modifier. -// d: default -// g: default, ignore 'Q' size modifier. -// j: default, force 'Q' size modifier. -// w: double width elements, same num elts -// n: double width elements, half num elts -// h: half width elements, double num elts -// q: half width elements, quad num elts -// e: half width elements, double num elts, unsigned -// m: half width elements, same num elts -// i: constant int -// l: constant uint64 -// s: scalar of element type -// z: scalar of half width element type, signed -// r: scalar of double width element type, signed -// b: scalar of unsigned integer/long type (int/float args) -// $: scalar of signed integer/long type (int/float args) -// y: scalar of float -// o: scalar of double -// k: default elt width, double num elts -// 2,3,4: array of default vectors -// B,C,D: array of default elts, force 'Q' size modifier. -// p: pointer type -// c: const pointer type -// 7: vector of 8-bit elements, ignore 'Q' size modifier -// 8: vector of 8-bit elements, same width as default type -// 9: vector of 8-bit elements, force 'Q' size modifier +// Each type modifier is either a single character, or a group surrounded by +// parentheses. +// +// .: default +// v: change to void category. +// S: change to signed integer category. +// U: change to unsigned integer category. +// F: change to floating category. +// P: change to polynomial category. +// p: change polynomial to equivalent integer category. Otherwise nop. +// +// >: double element width (vector size unchanged). +// <: half element width (vector size unchanged). +// +// 1: change to scalar. +// 2: change to struct of two vectors. +// 3: change to struct of three vectors. +// 4: change to struct of four vectors. +// +// *: make a pointer argument. +// c: make a constant argument (for pointers). +// +// Q: force 128-bit width. +// q: force 64-bit width. +// +// I: make 32-bit signed scalar immediate +// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call. + // Every intrinsic subclasses Inst. class Inst { diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index da8f819dee964..d382cf77a8b22 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_DRIVER_DISTRO_H #define LLVM_CLANG_DRIVER_DISTRO_H +#include "llvm/ADT/Triple.h" #include "llvm/Support/VirtualFileSystem.h" namespace clang { @@ -84,7 +85,7 @@ class Distro { Distro(DistroType D) : DistroVal(D) {} /// Detects the distribution using specified VFS. - explicit Distro(llvm::vfs::FileSystem &VFS); + explicit Distro(llvm::vfs::FileSystem &VFS, const llvm::Triple &TargetOrHost); bool operator==(const Distro &Other) const { return DistroVal == Other.DistroVal; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9ae5fb54e1cfa..1b7ddb501f150 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -872,8 +872,6 @@ def fconstexpr_depth_EQ : Joined<["-"], "fconstexpr-depth=">, Group; def fconstexpr_steps_EQ : Joined<["-"], "fconstexpr-steps=">, Group; def fexperimental_new_constant_interpreter : Flag<["-"], "fexperimental-new-constant-interpreter">, Group, HelpText<"Enable the experimental new constant interpreter">, Flags<[CC1Option]>; -def fforce_experimental_new_constant_interpreter : Flag<["-"], "fforce-experimental-new-constant-interpreter">, Group, - HelpText<"Force the use of the experimental new constant interpreter, failing on missing features">, Flags<[CC1Option]>; def fconstexpr_backtrace_limit_EQ : Joined<["-"], "fconstexpr-backtrace-limit=">, Group; def fno_crash_diagnostics : Flag<["-"], "fno-crash-diagnostics">, Group, Flags<[NoArgumentUnused, CoreOption]>, @@ -953,6 +951,10 @@ def : Flag<["-"], "fextended-identifiers">, Group; def : Flag<["-"], "fno-extended-identifiers">, Group, Flags<[Unsupported]>; def fhosted : Flag<["-"], "fhosted">, Group; def fdenormal_fp_math_EQ : Joined<["-"], "fdenormal-fp-math=">, Group, Flags<[CC1Option]>; +def ffp_model_EQ : Joined<["-"], "ffp-model=">, Group, Flags<[DriverOption]>, + HelpText<"Controls the semantics of floating-point calculations.">; +def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group, Flags<[CC1Option]>, + HelpText<"Specifies the exception behavior of floating-point operations.">; def ffast_math : Flag<["-"], "ffast-math">, Group, Flags<[CC1Option]>, HelpText<"Allow aggressive, lossy floating-point optimizations">; def fno_fast_math : Flag<["-"], "fno-fast-math">, Group; @@ -1178,6 +1180,8 @@ def fno_honor_infinities : Flag<["-"], "fno-honor-infinities">, Group; // This option was originally misspelt "infinites" [sic]. def : Flag<["-"], "fhonor-infinites">, Alias; def : Flag<["-"], "fno-honor-infinites">, Alias; +def frounding_math : Flag<["-"], "frounding-math">, Group, Flags<[CC1Option]>; +def fno_rounding_math : Flag<["-"], "fno-rounding-math">, Group, Flags<[CC1Option]>; def ftrapping_math : Flag<["-"], "ftrapping-math">, Group, Flags<[CC1Option]>; def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group, Flags<[CC1Option]>; def ffp_contract : Joined<["-"], "ffp-contract=">, Group, @@ -2023,6 +2027,12 @@ def fdebug_prefix_map_EQ : Joined<["-"], "fdebug-prefix-map=">, Group, Flags<[CC1Option,CC1AsOption]>, HelpText<"remap file source paths in debug info">; +def ffile_prefix_map_EQ + : Joined<["-"], "ffile-prefix-map=">, Group, Flags<[CC1Option]>, + HelpText<"remap file source paths in debug info and predefined preprocessor macros">; +def fmacro_prefix_map_EQ + : Joined<["-"], "fmacro-prefix-map=">, Group, Flags<[CC1Option]>, + HelpText<"remap file source paths in predefined preprocessor macros">; def fforce_dwarf_frame : Flag<["-"], "fforce-dwarf-frame">, Group, Flags<[CC1Option]>, HelpText<"Always emit a debug frame section">; def fno_force_dwarf_frame : Flag<["-"], "fno-force-dwarf-frame">, Group, Flags<[CC1Option]>, @@ -2322,9 +2332,8 @@ def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, def mno_restrict_it: Flag<["-"], "mno-restrict-it">, Group, HelpText<"Allow generation of deprecated IT blocks for ARMv8. It is off by default for ARMv8 Thumb mode">; def marm : Flag<["-"], "marm">, Alias; -foreach i = {6-11} in - def ffixed_r#i : Flag<["-"], "ffixed-r"#i>, Group, - HelpText<"Reserve the r"#i#" register (ARM only)">; +def ffixed_r9 : Flag<["-"], "ffixed-r9">, Group, + HelpText<"Reserve the r9 register (ARM only)">; def mno_movt : Flag<["-"], "mno-movt">, Group, HelpText<"Disallow use of movt/movw pairs (ARM only)">; def mcrc : Flag<["-"], "mcrc">, Group, @@ -3298,7 +3307,6 @@ defm profile_values : BooleanFFlag<"profile-values">, Group, Group; defm rename_registers : BooleanFFlag<"rename-registers">, Group; defm ripa : BooleanFFlag<"ripa">, Group; -defm rounding_math : BooleanFFlag<"rounding-math">, Group; defm schedule_insns : BooleanFFlag<"schedule-insns">, Group; defm schedule_insns2 : BooleanFFlag<"schedule-insns2">, Group; defm see : BooleanFFlag<"see">, Group; diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 604a935f3a288..f17a10c7f5c80 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1953,6 +1953,15 @@ struct FormatStyle { /// \endcode bool SpacesInAngles; + /// If ``true``, spaces will be inserted around if/for/switch/while + /// conditions. + /// \code + /// true: false: + /// if ( a ) { ... } vs. if (a) { ... } + /// while ( i < 5 ) { ... } while (i < 5) { ... } + /// \endcode + bool SpacesInConditionalStatement; + /// If ``true``, spaces are inserted inside container literals (e.g. /// ObjC and Javascript array and dict literals). /// \code{.js} @@ -2155,6 +2164,7 @@ struct FormatStyle { SpaceInEmptyParentheses == R.SpaceInEmptyParentheses && SpacesBeforeTrailingComments == R.SpacesBeforeTrailingComments && SpacesInAngles == R.SpacesInAngles && + SpacesInConditionalStatement == R.SpacesInConditionalStatement && SpacesInContainerLiterals == R.SpacesInContainerLiterals && SpacesInCStyleCastParentheses == R.SpacesInCStyleCastParentheses && SpacesInParentheses == R.SpacesInParentheses && diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index 1a8a64951ec49..5ae77735576cd 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -134,14 +134,6 @@ class PrecompiledPreamble { // A main method used to construct TempPCHFile. static llvm::ErrorOr CreateNewPreamblePCHFile(); - /// Call llvm::sys::fs::createTemporaryFile to create a new temporary file. - static llvm::ErrorOr createInSystemTempDir(const Twine &Prefix, - StringRef Suffix); - /// Create a new instance of TemporaryFile for file at \p Path. Use with - /// extreme caution, there's an assertion checking that there's only a - /// single instance of TempPCHFile alive for each path. - static llvm::ErrorOr createFromCustomPath(const Twine &Path); - private: TempPCHFile(std::string FilePath); diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h index 09b69f85f35f9..e50e38c4844ea 100644 --- a/clang/include/clang/Frontend/Utils.h +++ b/clang/include/clang/Frontend/Utils.h @@ -218,14 +218,18 @@ createChainedIncludesSource(CompilerInstance &CI, /// non-null (and possibly incorrect) CompilerInvocation if any errors were /// encountered. When this flag is false, always return null on errors. /// -/// \return A CompilerInvocation, or 0 if none was built for the given +/// \param CC1Args - if non-null, will be populated with the args to cc1 +/// expanded from \p Args. May be set even if nullptr is returned. +/// +/// \return A CompilerInvocation, or nullptr if none was built for the given /// argument vector. std::unique_ptr createInvocationFromCommandLine( ArrayRef Args, IntrusiveRefCntPtr Diags = IntrusiveRefCntPtr(), IntrusiveRefCntPtr VFS = nullptr, - bool ShouldRecoverOnErrors = false); + bool ShouldRecoverOnErrors = false, + std::vector *CC1Args = nullptr); /// Return the value of the last argument as an integer, or a default. If Diags /// is non-null, emits an error if the argument is given, but non-integral. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index e2ddc80d503f1..9716196b95c23 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -932,6 +932,12 @@ class Preprocessor { return TheModuleLoader.HadFatalFailure; } + /// Retrieve the number of Directives that have been processed by the + /// Preprocessor. + unsigned getNumDirectives() const { + return NumDirectives; + } + /// True if we are currently preprocessing a #if or #elif directive bool isParsingIfOrElifDirective() const { return ParsingIfOrElifDirective; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 344afa8941723..abffbd03c3b48 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -13,6 +13,8 @@ #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include +#include #include #include #include @@ -173,6 +175,9 @@ class PreprocessorOptions { /// build it again. std::shared_ptr FailedModules; + /// A prefix map for __FILE__ and __BASE_FILE__. + std::map> MacroPrefixMap; + /// Contains the currently active skipped range mappings for skipping excluded /// conditional directives. /// diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 74518278c1c7b..d271a65f48b41 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1122,6 +1122,11 @@ class Parser : public CodeCompletionHandler { /// point for skipping past a simple-declaration. void SkipMalformedDecl(); + /// The location of the first statement inside an else that might + /// have a missleading indentation. If there is no + /// MisleadingIndentationChecker on an else active, this location is invalid. + SourceLocation MisleadingIndentationElseLoc; + private: //===--------------------------------------------------------------------===// // Lexing and parsing of C++ inline methods. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 808113e1b9657..bab94c01117b3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4035,6 +4035,9 @@ class Sema final { /// Add the given method to the list of globally-known methods. void addMethodToGlobalList(ObjCMethodList *List, ObjCMethodDecl *Method); + /// Returns default addr space for method qualifiers. + LangAS getDefaultCXXMethodAddrSpace() const; + private: /// AddMethodToGlobalPool - Add an instance or factory method to the global /// pool. See descriptoin of AddInstanceMethodToGlobalPool. @@ -4464,9 +4467,11 @@ class Sema final { typedef ProcessingContextState ParsingClassState; ParsingClassState PushParsingClass() { + ParsingClassDepth++; return DelayedDiagnostics.pushUndelayed(); } void PopParsingClass(ParsingClassState state) { + ParsingClassDepth--; DelayedDiagnostics.popUndelayed(state); } @@ -6519,7 +6524,7 @@ class Sema final { SourceLocation RBrac, const ParsedAttributesView &AttrList); void ActOnFinishCXXMemberDecls(); - void ActOnFinishCXXNonNestedClass(Decl *D); + void ActOnFinishCXXNonNestedClass(); void ActOnReenterCXXMethodParameter(Scope *S, ParmVarDecl *Param); unsigned ActOnReenterTemplateScope(Scope *S, Decl *Template); @@ -8897,6 +8902,8 @@ class Sema final { bool CheckARCMethodDecl(ObjCMethodDecl *method); bool inferObjCARCLifetime(ValueDecl *decl); + void deduceOpenCLAddressSpace(ValueDecl *decl); + ExprResult HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, Expr *BaseExpr, @@ -11428,6 +11435,8 @@ class Sema final { bool CheckHexagonBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall); bool CheckHexagonBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckMipsBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall); + bool CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); @@ -11690,6 +11699,8 @@ class Sema final { SmallVector DelayedDllExportMemberFunctions; private: + int ParsingClassDepth = 0; + class SavePendingParsedClassStateRAII { public: SavePendingParsedClassStateRAII(Sema &S) : S(S) { swapSavedState(); } @@ -11699,8 +11710,6 @@ class Sema final { "there shouldn't be any pending delayed exception spec checks"); assert(S.DelayedEquivalentExceptionSpecChecks.empty() && "there shouldn't be any pending delayed exception spec checks"); - assert(S.DelayedDllExportClasses.empty() && - "there shouldn't be any pending delayed DLL export classes"); swapSavedState(); } @@ -11710,14 +11719,12 @@ class Sema final { SavedOverridingExceptionSpecChecks; decltype(DelayedEquivalentExceptionSpecChecks) SavedEquivalentExceptionSpecChecks; - decltype(DelayedDllExportClasses) SavedDllExportClasses; void swapSavedState() { SavedOverridingExceptionSpecChecks.swap( S.DelayedOverridingExceptionSpecChecks); SavedEquivalentExceptionSpecChecks.swap( S.DelayedEquivalentExceptionSpecChecks); - SavedDllExportClasses.swap(S.DelayedDllExportClasses); } }; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index f0b5e99338232..b6dae68b3413b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,6 +551,14 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; + /// Key used to identify LifetimeExtendedTemporaryDecl for merging, + /// containing the lifetime-extending declaration and the mangling number. + using LETemporaryKey = std::pair; + + /// Map of already deserialiazed temporaries. + llvm::DenseMap + LETemporaryForMerging; + struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/include/clang/Tooling/CompilationDatabase.h b/clang/include/clang/Tooling/CompilationDatabase.h index dea046a2dc7c3..b28a8a6d6e51c 100644 --- a/clang/include/clang/Tooling/CompilationDatabase.h +++ b/clang/include/clang/Tooling/CompilationDatabase.h @@ -31,6 +31,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/VirtualFileSystem.h" #include #include #include @@ -219,6 +220,12 @@ std::unique_ptr std::unique_ptr inferTargetAndDriverMode(std::unique_ptr Base); +/// Returns a wrapped CompilationDatabase that will expand all rsp(response) +/// files on commandline returned by underlying database. +std::unique_ptr +expandResponseFiles(std::unique_ptr Base, + llvm::IntrusiveRefCntPtr FS); + } // namespace tooling } // namespace clang diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index c40b6bd24817f..c4db4da892c2d 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -37,7 +37,6 @@ namespace syntax { enum class NodeKind : uint16_t { Leaf, TranslationUnit, - TopLevelDeclaration, // Expressions UnknownExpression, @@ -57,7 +56,11 @@ enum class NodeKind : uint16_t { ReturnStatement, RangeBasedForStatement, ExpressionStatement, - CompoundStatement + CompoundStatement, + + // Declarations + UnknownDeclaration, + SimpleDeclaration, }; /// For debugging purposes. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K); @@ -102,20 +105,6 @@ class TranslationUnit final : public Tree { } }; -/// FIXME: this node is temporary and will be replaced with nodes for various -/// 'declarations' and 'declarators' from the C/C++ grammar -/// -/// Represents any top-level declaration. Only there to give the syntax tree a -/// bit of structure until we implement syntax nodes for declarations and -/// declarators. -class TopLevelDeclaration final : public Tree { -public: - TopLevelDeclaration() : Tree(NodeKind::TopLevelDeclaration) {} - static bool classof(const Node *N) { - return N->kind() == NodeKind::TopLevelDeclaration; - } -}; - /// A base class for all expressions. Note that expressions are not statements, /// even though they are in clang. class Expression : public Tree { @@ -313,6 +302,38 @@ class CompoundStatement final : public Statement { syntax::Leaf *rbrace(); }; +/// A declaration that can appear at the top-level. Note that this does *not* +/// correspond 1-to-1 to clang::Decl. Syntax trees distinguish between top-level +/// declarations (e.g. namespace definitions) and declarators (e.g. variables, +/// typedefs, etc.). Declarators are stored inside SimpleDeclaration. +class Declaration : public Tree { +public: + Declaration(NodeKind K) : Tree(K) {} + static bool classof(const Node *N) { + return NodeKind::UnknownDeclaration <= N->kind() && + N->kind() <= NodeKind::SimpleDeclaration; + } +}; + +/// Declaration of an unknown kind, e.g. not yet supported in syntax trees. +class UnknownDeclaration final : public Declaration { +public: + UnknownDeclaration() : Declaration(NodeKind::UnknownDeclaration) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownDeclaration; + } +}; + +/// Groups multiple declarators (e.g. variables, typedefs, etc.) together. All +/// grouped declarators share the same declaration specifiers (e.g. 'int' or +/// 'typedef'). +class SimpleDeclaration final : public Declaration { +public: + SimpleDeclaration() : Declaration(NodeKind::SimpleDeclaration) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::SimpleDeclaration; + } +}; } // namespace syntax } // namespace clang #endif diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index 301432d3888b3..6f4d0e0c050af 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -175,6 +175,7 @@ class TokenBuffer { /// All tokens produced by the preprocessor after all macro replacements, /// directives, etc. Source locations found in the clang AST will always /// point to one of these tokens. + /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()). /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split /// into two '>' tokens by the parser. However, TokenBuffer currently /// keeps it as a single '>>' token. @@ -182,6 +183,10 @@ class TokenBuffer { return ExpandedTokens; } + /// Returns the subrange of expandedTokens() corresponding to the closed + /// token range R. + llvm::ArrayRef expandedTokens(SourceRange R) const; + /// Find the subrange of spelled tokens that produced the corresponding \p /// Expanded tokens. /// diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index bd29a6991afe5..2ed523b741b15 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -771,6 +771,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { if (!LangOpts.CPlusPlus) return nullptr; switch (T.getCXXABI().getKind()) { + case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericARM: // Same as Itanium at this level case TargetCXXABI::iOS: case TargetCXXABI::iOS64: @@ -3880,10 +3881,11 @@ QualType ASTContext::getFunctionTypeInternal( auto ESH = FunctionProtoType::getExceptionSpecSize( EPI.ExceptionSpec.Type, EPI.ExceptionSpec.Exceptions.size()); size_t Size = FunctionProtoType::totalSizeToAlloc< - QualType, FunctionType::FunctionTypeExtraBitfields, + QualType, SourceLocation, FunctionType::FunctionTypeExtraBitfields, FunctionType::ExceptionType, Expr *, FunctionDecl *, FunctionProtoType::ExtParameterInfo, Qualifiers>( - NumArgs, FunctionProtoType::hasExtraBitfields(EPI.ExceptionSpec.Type), + NumArgs, EPI.Variadic, + FunctionProtoType::hasExtraBitfields(EPI.ExceptionSpec.Type), ESH.NumExceptionType, ESH.NumExprPtr, ESH.NumFunctionDeclPtr, EPI.ExtParameterInfos ? NumArgs : 0, EPI.TypeQuals.hasNonFastQualifiers() ? 1 : 0); @@ -10177,6 +10179,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { if (!T) T = Target; switch (T->getCXXABI().getKind()) { + case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 3723c868004fe..0301110b7067f 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2793,6 +2793,7 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, FunctionDeclBits.ConstexprKind = ConstexprKind; FunctionDeclBits.InstantiationIsPending = false; FunctionDeclBits.UsesSEHTry = false; + FunctionDeclBits.UsesFPIntrin = false; FunctionDeclBits.HasSkippedBody = false; FunctionDeclBits.WillHaveBody = false; FunctionDeclBits.IsMultiVersion = false; @@ -3356,6 +3357,22 @@ SourceRange FunctionDecl::getReturnTypeSourceRange() const { return RTRange; } +SourceRange FunctionDecl::getParametersSourceRange() const { + unsigned NP = getNumParams(); + SourceLocation EllipsisLoc = getEllipsisLoc(); + + if (NP == 0 && EllipsisLoc.isInvalid()) + return SourceRange(); + + SourceLocation Begin = + NP > 0 ? ParamInfo[0]->getSourceRange().getBegin() : EllipsisLoc; + SourceLocation End = EllipsisLoc.isValid() + ? EllipsisLoc + : ParamInfo[NP - 1]->getSourceRange().getEnd(); + + return SourceRange(Begin, End); +} + SourceRange FunctionDecl::getExceptionSpecSourceRange() const { FunctionTypeLoc FTL = getFunctionTypeLoc(); return FTL ? FTL.getExceptionSpecRange() : SourceRange(); diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index c4fd5cd1c3c65..03a6d8c9bcff2 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1754,6 +1754,15 @@ MemberExpr *MemberExpr::Create( MemberExpr *E = new (Mem) MemberExpr(Base, IsArrow, OperatorLoc, MemberDecl, NameInfo, T, VK, OK, NOUR); + if (isa(MemberDecl)) { + DeclContext *DC = MemberDecl->getDeclContext(); + // dyn_cast_or_null is used to handle objC variables which do not + // have a declaration context. + CXXRecordDecl *RD = dyn_cast_or_null(DC); + if (RD && RD->isDependentContext() && RD->isCurrentInstantiation(DC)) + E->setTypeDependent(T->isDependentType()); + } + if (HasQualOrFound) { // FIXME: Wrong. We should be looking at the member declaration we found. if (QualifierLoc && QualifierLoc.getNestedNameSpecifier()->isDependent()) { @@ -1890,7 +1899,7 @@ bool CastExpr::CastConsistency() const { auto Ty = getType(); auto SETy = getSubExpr()->getType(); assert(getValueKindForType(Ty) == Expr::getValueKindForType(SETy)); - if (/*isRValue()*/ !Ty->getPointeeType().isNull()) { + if (isRValue()) { Ty = Ty->getPointeeType(); SETy = SETy->getPointeeType(); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 79659261388b4..7a17b76f05d3d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -763,11 +763,8 @@ namespace { /// we will evaluate. unsigned StepsLeft; - /// Force the use of the experimental new constant interpreter, bailing out - /// with an error if a feature is not supported. - bool ForceNewConstInterp; - - /// Enable the experimental new constant interpreter. + /// Enable the experimental new constant interpreter. If an expression is + /// not supported by the interpreter, an error is triggered. bool EnableNewConstInterp; /// BottomFrame - The frame in which evaluation started. This must be @@ -921,10 +918,8 @@ namespace { EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), CallStackDepth(0), NextCallIndex(1), - StepsLeft(getLangOpts().ConstexprStepLimit), - ForceNewConstInterp(getLangOpts().ForceNewConstInterp), - EnableNewConstInterp(ForceNewConstInterp || - getLangOpts().EnableNewConstInterp), + StepsLeft(C.getLangOpts().ConstexprStepLimit), + EnableNewConstInterp(C.getLangOpts().EnableNewConstInterp), BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), EvaluatingDecl((const ValueDecl *)nullptr), EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), @@ -7866,6 +7861,11 @@ class PointerExprEvaluator // either copied into the closure object's field that represents the '*this' // or refers to '*this'. if (isLambdaCallOperator(Info.CurrentCall->Callee)) { + // Ensure we actually have captured 'this'. (an error will have + // been previously reported if not). + if (!Info.CurrentCall->LambdaThisCaptureField) + return false; + // Update 'Result' to refer to the data member/field of the closure object // that represents the '*this' capture. if (!HandleLValueMember(Info, E, Result, @@ -13400,32 +13400,25 @@ static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, /// EvaluateAsRValue - Try to evaluate this expression, performing an implicit /// lvalue-to-rvalue cast if it is an lvalue. static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { - if (Info.EnableNewConstInterp) { - auto &InterpCtx = Info.Ctx.getInterpContext(); - switch (InterpCtx.evaluateAsRValue(Info, E, Result)) { - case interp::InterpResult::Success: - return true; - case interp::InterpResult::Fail: + if (Info.EnableNewConstInterp) { + if (!Info.Ctx.getInterpContext().evaluateAsRValue(Info, E, Result)) + return false; + } else { + if (E->getType().isNull()) return false; - case interp::InterpResult::Bail: - break; - } - } - - if (E->getType().isNull()) - return false; - - if (!CheckLiteralType(Info, E)) - return false; - if (!::Evaluate(Result, Info, E)) - return false; + if (!CheckLiteralType(Info, E)) + return false; - if (E->isGLValue()) { - LValue LV; - LV.setFrom(Info.Ctx, Result); - if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) + if (!::Evaluate(Result, Info, E)) return false; + + if (E->isGLValue()) { + LValue LV; + LV.setFrom(Info.Ctx, Result); + if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) + return false; + } } // Check this core constant expression is a constant expression. @@ -13637,46 +13630,36 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, if (Info.EnableNewConstInterp) { auto &InterpCtx = const_cast(Ctx).getInterpContext(); - switch (InterpCtx.evaluateAsInitializer(Info, VD, Value)) { - case interp::InterpResult::Fail: - // Bail out if an error was encountered. - return false; - case interp::InterpResult::Success: - // Evaluation succeeded and value was set. - return CheckConstantExpression(Info, DeclLoc, DeclTy, Value); - case interp::InterpResult::Bail: - // Evaluate the value again for the tree evaluator to use. - break; + if (!InterpCtx.evaluateAsInitializer(Info, VD, Value)) + return false; + } else { + LValue LVal; + LVal.set(VD); + + // C++11 [basic.start.init]p2: + // Variables with static storage duration or thread storage duration shall + // be zero-initialized before any other initialization takes place. + // This behavior is not present in C. + if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() && + !DeclTy->isReferenceType()) { + ImplicitValueInitExpr VIE(DeclTy); + if (!EvaluateInPlace(Value, Info, LVal, &VIE, + /*AllowNonLiteralTypes=*/true)) + return false; } - } - - LValue LVal; - LVal.set(VD); - // C++11 [basic.start.init]p2: - // Variables with static storage duration or thread storage duration shall be - // zero-initialized before any other initialization takes place. - // This behavior is not present in C. - if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() && - !DeclTy->isReferenceType()) { - ImplicitValueInitExpr VIE(DeclTy); - if (!EvaluateInPlace(Value, Info, LVal, &VIE, - /*AllowNonLiteralTypes=*/true)) + if (!EvaluateInPlace(Value, Info, LVal, this, + /*AllowNonLiteralTypes=*/true) || + EStatus.HasSideEffects) return false; - } - - if (!EvaluateInPlace(Value, Info, LVal, this, - /*AllowNonLiteralTypes=*/true) || - EStatus.HasSideEffects) - return false; - - // At this point, any lifetime-extended temporaries are completely - // initialized. - Info.performLifetimeExtension(); - if (!Info.discardCleanups()) - llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + // At this point, any lifetime-extended temporaries are completely + // initialized. + Info.performLifetimeExtension(); + if (!Info.discardCleanups()) + llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + } return CheckConstantExpression(Info, DeclLoc, DeclTy, Value) && CheckMemoryLeaks(Info); } @@ -14415,14 +14398,8 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, // The constexpr VM attempts to compile all methods to bytecode here. if (Info.EnableNewConstInterp) { - auto &InterpCtx = Info.Ctx.getInterpContext(); - switch (InterpCtx.isPotentialConstantExpr(Info, FD)) { - case interp::InterpResult::Success: - case interp::InterpResult::Fail: - return Diags.empty(); - case interp::InterpResult::Bail: - break; - } + Info.Ctx.getInterpContext().isPotentialConstantExpr(Info, FD); + return Diags.empty(); } const CXXMethodDecl *MD = dyn_cast(FD); diff --git a/clang/lib/AST/Interp/Context.cpp b/clang/lib/AST/Interp/Context.cpp index 4f8f7b96e7c32..e7f9ba0f010ae 100644 --- a/clang/lib/AST/Interp/Context.cpp +++ b/clang/lib/AST/Interp/Context.cpp @@ -21,44 +21,37 @@ using namespace clang; using namespace clang::interp; -Context::Context(ASTContext &Ctx) - : Ctx(Ctx), ForceInterp(getLangOpts().ForceNewConstInterp), - P(new Program(*this)) {} +Context::Context(ASTContext &Ctx) : Ctx(Ctx), P(new Program(*this)) {} Context::~Context() {} -InterpResult Context::isPotentialConstantExpr(State &Parent, - const FunctionDecl *FD) { +bool Context::isPotentialConstantExpr(State &Parent, const FunctionDecl *FD) { Function *Func = P->getFunction(FD); if (!Func) { if (auto R = ByteCodeStmtGen(*this, *P).compileFunc(FD)) { Func = *R; - } else if (ForceInterp) { + } else { handleAllErrors(R.takeError(), [&Parent](ByteCodeGenError &Err) { Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); }); - return InterpResult::Fail; - } else { - consumeError(R.takeError()); - return InterpResult::Bail; + return false; } } if (!Func->isConstexpr()) - return InterpResult::Fail; + return false; APValue Dummy; return Run(Parent, Func, Dummy); } -InterpResult Context::evaluateAsRValue(State &Parent, const Expr *E, - APValue &Result) { +bool Context::evaluateAsRValue(State &Parent, const Expr *E, APValue &Result) { ByteCodeExprGen C(*this, *P, Parent, Stk, Result); return Check(Parent, C.interpretExpr(E)); } -InterpResult Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, - APValue &Result) { +bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, + APValue &Result) { ByteCodeExprGen C(*this, *P, Parent, Stk, Result); return Check(Parent, C.interpretDecl(VD)); } @@ -116,33 +109,20 @@ unsigned Context::getCharBit() const { return Ctx.getTargetInfo().getCharWidth(); } -InterpResult Context::Run(State &Parent, Function *Func, APValue &Result) { - InterpResult Flag; - { - InterpState State(Parent, *P, Stk, *this); - State.Current = new InterpFrame(State, Func, nullptr, {}, {}); - if (Interpret(State, Result)) { - Flag = InterpResult::Success; - } else { - Flag = InterpResult::Fail; - } - } - - if (Flag != InterpResult::Success) - Stk.clear(); - return Flag; +bool Context::Run(State &Parent, Function *Func, APValue &Result) { + InterpState State(Parent, *P, Stk, *this); + State.Current = new InterpFrame(State, Func, nullptr, {}, {}); + if (Interpret(State, Result)) + return true; + Stk.clear(); + return false; } -InterpResult Context::Check(State &Parent, llvm::Expected &&R) { - if (R) { - return *R ? InterpResult::Success : InterpResult::Fail; - } else if (ForceInterp) { - handleAllErrors(R.takeError(), [&Parent](ByteCodeGenError &Err) { - Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); - }); - return InterpResult::Fail; - } else { - consumeError(R.takeError()); - return InterpResult::Bail; - } +bool Context::Check(State &Parent, llvm::Expected &&Flag) { + if (Flag) + return *Flag; + handleAllErrors(Flag.takeError(), [&Parent](ByteCodeGenError &Err) { + Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); + }); + return false; } diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h index 96368b6e5f02f..e4d831cbb9912 100644 --- a/clang/lib/AST/Interp/Context.h +++ b/clang/lib/AST/Interp/Context.h @@ -34,16 +34,6 @@ class Program; class State; enum PrimType : unsigned; -/// Wrapper around interpreter termination results. -enum class InterpResult { - /// Interpreter successfully computed a value. - Success, - /// Interpreter encountered an error and quit. - Fail, - /// Interpreter encountered an unimplemented feature, AST fallback. - Bail, -}; - /// Holds all information required to evaluate constexpr code in a module. class Context { public: @@ -54,15 +44,13 @@ class Context { ~Context(); /// Checks if a function is a potential constant expression. - InterpResult isPotentialConstantExpr(State &Parent, - const FunctionDecl *FnDecl); + bool isPotentialConstantExpr(State &Parent, const FunctionDecl *FnDecl); /// Evaluates a toplevel expression as an rvalue. - InterpResult evaluateAsRValue(State &Parent, const Expr *E, APValue &Result); + bool evaluateAsRValue(State &Parent, const Expr *E, APValue &Result); /// Evaluates a toplevel initializer. - InterpResult evaluateAsInitializer(State &Parent, const VarDecl *VD, - APValue &Result); + bool evaluateAsInitializer(State &Parent, const VarDecl *VD, APValue &Result); /// Returns the AST context. ASTContext &getASTContext() const { return Ctx; } @@ -78,16 +66,14 @@ class Context { private: /// Runs a function. - InterpResult Run(State &Parent, Function *Func, APValue &Result); + bool Run(State &Parent, Function *Func, APValue &Result); /// Checks a result fromt the interpreter. - InterpResult Check(State &Parent, llvm::Expected &&R); + bool Check(State &Parent, llvm::Expected &&R); private: /// Current compilation context. ASTContext &Ctx; - /// Flag to indicate if the use of the interpreter is mandatory. - bool ForceInterp; /// Interpreter stack, shared across invocations. InterpStack Stk; /// Constexpr program. diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 274cc25b8bb8d..40c6c8375a606 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -202,14 +202,20 @@ void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc, PresumedLoc Presumed = SM.getPresumedLoc(Loc); unsigned ActualLine = IsSpelling ? SM.getSpellingLineNumber(Loc) : SM.getExpansionLineNumber(Loc); + StringRef ActualFile = SM.getBufferName(Loc); + if (Presumed.isValid()) { JOS.attribute("offset", SM.getDecomposedLoc(Loc).second); - if (LastLocFilename != Presumed.getFilename()) { - JOS.attribute("file", Presumed.getFilename()); + if (LastLocFilename != ActualFile) { + JOS.attribute("file", ActualFile); JOS.attribute("line", ActualLine); } else if (LastLocLine != ActualLine) JOS.attribute("line", ActualLine); + StringRef PresumedFile = Presumed.getFilename(); + if (PresumedFile != ActualFile && LastLocPresumedFilename != PresumedFile) + JOS.attribute("presumedFile", PresumedFile); + unsigned PresumedLine = Presumed.getLine(); if (ActualLine != PresumedLine && LastLocPresumedLine != PresumedLine) JOS.attribute("presumedLine", PresumedLine); @@ -217,7 +223,8 @@ void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc, JOS.attribute("col", Presumed.getColumn()); JOS.attribute("tokLen", Lexer::MeasureTokenLength(Loc, SM, Ctx.getLangOpts())); - LastLocFilename = Presumed.getFilename(); + LastLocFilename = ActualFile; + LastLocPresumedFilename = PresumedFile; LastLocPresumedLine = PresumedLine; LastLocLine = ActualLine; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 0ff95213118fd..561c76a45cbc2 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,6 +1338,17 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } +void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( + const LifetimeExtendedTemporaryDecl *D) { + OS << " extended by "; + dumpBareDeclRef(D->getExtendingDecl()); + OS << " mangling "; + { + ColorScope Color(OS, ShowColors, ValueColor); + OS << D->getManglingNumber(); + } +} + void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 4fed5b410b172..2eae2ebb61741 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3065,6 +3065,12 @@ FunctionProtoType::FunctionProtoType(QualType result, ArrayRef params, } else { FunctionTypeBits.HasExtQuals = 0; } + + // Fill in the Ellipsis location info if present. + if (epi.Variadic) { + auto &EllipsisLoc = *getTrailingObjects(); + EllipsisLoc = epi.EllipsisLoc; + } } bool FunctionProtoType::hasDependentExceptionSpec() const { diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index 694913b3ac937..1a7891550542d 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -741,13 +741,17 @@ static Stmt *createObjCPropertyGetter(ASTContext &Ctx, // First, find the backing ivar. const ObjCIvarDecl *IVar = nullptr; - // Property accessor stubs sometimes do not correspond to any property. + // Property accessor stubs sometimes do not correspond to any property decl + // in the current interface (but in a superclass). They still have a + // corresponding property impl decl in this case. if (MD->isSynthesizedAccessorStub()) { const ObjCInterfaceDecl *IntD = MD->getClassInterface(); const ObjCImplementationDecl *ImpD = IntD->getImplementation(); - for (const auto *V: ImpD->ivars()) { - if (V->getName() == MD->getSelector().getNameForSlot(0)) - IVar = V; + for (const auto *PI: ImpD->property_impls()) { + if (const ObjCPropertyDecl *P = PI->getPropertyDecl()) { + if (P->getGetterName() == MD->getSelector()) + IVar = P->getPropertyIvarDecl(); + } } } diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 5214f7c30ee0a..cba3e3ada7ea5 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -158,6 +158,7 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, MacroBuilder &Builder) const { + Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); Builder.defineMacro("__ARM_FEATURE_JCVT", "1"); // Also include the Armv8.2 defines getTargetDefinesARMV82A(Opts, Builder); diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 92e5e26eba3c2..be088e81cffe4 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -580,6 +580,13 @@ void ARMTargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, getTargetDefinesARMV81A(Opts, Builder); } +void ARMTargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the ARMv8.2-A defines + Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); + getTargetDefinesARMV82A(Opts, Builder); +} + void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. @@ -809,6 +816,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::ARM::ArchKind::ARMV8_2A: getTargetDefinesARMV82A(Opts, Builder); break; + case llvm::ARM::ArchKind::ARMV8_3A: + case llvm::ARM::ArchKind::ARMV8_4A: + case llvm::ARM::ArchKind::ARMV8_5A: + getTargetDefinesARMV83A(Opts, Builder); + break; } } @@ -879,38 +891,6 @@ ArrayRef ARMTargetInfo::getGCCRegAliases() const { return llvm::makeArrayRef(GCCRegAliases); } -bool ARMTargetInfo::validateGlobalRegisterVariable( - StringRef RegName, unsigned RegSize, bool &HasSizeMismatch) const { - bool isValid = llvm::StringSwitch(RegName) - .Case("r6", true) - .Case("r7", true) - .Case("r8", true) - .Case("r9", true) - .Case("r10", true) - .Case("r11", true) - .Case("sp", true) - .Default(false); - HasSizeMismatch = false; - return isValid; -} - -bool ARMTargetInfo::isRegisterReservedGlobally(StringRef RegName) const { - // The "sp" register does not have a -ffixed-sp option, - // so reserve it unconditionally. - if (RegName.equals("sp")) - return true; - - // reserve rN (N:6-11) registers only if the corresponding - // +reserve-rN feature is found - const std::vector &Features = getTargetOpts().Features; - const std::string SearchFeature = "+reserve-" + RegName.str(); - for (const std::string &Feature : Features) { - if (Feature.compare(SearchFeature) == 0) - return true; - } - return false; -} - bool ARMTargetInfo::validateAsmConstraint( const char *&Name, TargetInfo::ConstraintInfo &Info) const { switch (*Name) { diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 90fb20f8f7a5f..9696a44045891 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -148,9 +148,10 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; - void getTargetDefinesARMV82A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; @@ -161,9 +162,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { ArrayRef getGCCRegNames() const override; ArrayRef getGCCRegAliases() const override; - bool validateGlobalRegisterVariable(StringRef RegName, unsigned RegSize, - bool &HasSizeMismatch) const override; - bool isRegisterReservedGlobally(StringRef RegName) const override; bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &Info) const override; std::string convertConstraint(const char *&Constraint) const override; diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp index b9ab80df61940..ead5e91f7c8f2 100644 --- a/clang/lib/Basic/Targets/Mips.cpp +++ b/clang/lib/Basic/Targets/Mips.cpp @@ -213,7 +213,10 @@ void MipsTargetInfo::getTargetDefines(const LangOptions &Opts, bool MipsTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) .Case("mips", true) + .Case("dsp", DspRev >= DSP1) + .Case("dspr2", DspRev >= DSP2) .Case("fp64", FPMode == FP64) + .Case("msa", HasMSA) .Default(false); } diff --git a/clang/lib/Basic/Targets/OSTargets.cpp b/clang/lib/Basic/Targets/OSTargets.cpp index 72fdb0e7dde8a..d4ffffc64ba8d 100644 --- a/clang/lib/Basic/Targets/OSTargets.cpp +++ b/clang/lib/Basic/Targets/OSTargets.cpp @@ -180,7 +180,7 @@ static void addVisualCDefines(const LangOptions &Opts, MacroBuilder &Builder) { if (Opts.isCompatibleWithMSVC(LangOptions::MSVC2015)) { if (Opts.CPlusPlus2a) - Builder.defineMacro("_MSVC_LANG", "201704L"); + Builder.defineMacro("_MSVC_LANG", "201705L"); else if (Opts.CPlusPlus17) Builder.defineMacro("_MSVC_LANG", "201703L"); else if (Opts.CPlusPlus14) diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index cc72a0a39f30f..756cb7a8bbe3c 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -808,6 +808,7 @@ class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo { FuchsiaTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->MCountName = "__mcount"; + this->TheCXXABI.set(TargetCXXABI::Fuchsia); } }; diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 039fe6da84201..d07aaf58681c2 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -139,7 +139,7 @@ namespace { const LValue &getAtomicLValue() const { return LVal; } llvm::Value *getAtomicPointer() const { if (LVal.isSimple()) - return LVal.getPointer(); + return LVal.getPointer(CGF); else if (LVal.isBitField()) return LVal.getBitFieldPointer(); else if (LVal.isVectorElt()) @@ -343,7 +343,7 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const { bool AtomicInfo::emitMemSetZeroIfNecessary() const { assert(LVal.isSimple()); - llvm::Value *addr = LVal.getPointer(); + llvm::Value *addr = LVal.getPointer(CGF); if (!requiresMemSetZero(addr->getType()->getPointerElementType())) return false; @@ -1628,7 +1628,7 @@ Address AtomicInfo::materializeRValue(RValue rvalue) const { LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType()); AtomicInfo Atomics(CGF, TempLV); Atomics.emitCopyIntoMemory(rvalue); - return TempLV.getAddress(); + return TempLV.getAddress(CGF); } llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const { @@ -1975,8 +1975,8 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, // If this is an aggregate r-value, it should agree in type except // maybe for address-space qualification. assert(!rvalue.isAggregate() || - rvalue.getAggregateAddress().getElementType() - == dest.getAddress().getElementType()); + rvalue.getAggregateAddress().getElementType() == + dest.getAddress(*this).getElementType()); AtomicInfo atomics(*this, dest); LValue LVal = atomics.getAtomicLValue(); @@ -2043,10 +2043,10 @@ std::pair CodeGenFunction::EmitAtomicCompareExchange( // maybe for address-space qualification. assert(!Expected.isAggregate() || Expected.getAggregateAddress().getElementType() == - Obj.getAddress().getElementType()); + Obj.getAddress(*this).getElementType()); assert(!Desired.isAggregate() || Desired.getAggregateAddress().getElementType() == - Obj.getAddress().getElementType()); + Obj.getAddress(*this).getElementType()); AtomicInfo Atomics(*this, Obj); return Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, Failure, @@ -2086,13 +2086,11 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { } // Evaluate the expression directly into the destination. - AggValueSlot slot = AggValueSlot::forLValue(dest, - AggValueSlot::IsNotDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::DoesNotOverlap, - Zeroed ? AggValueSlot::IsZeroed : - AggValueSlot::IsNotZeroed); + AggValueSlot slot = AggValueSlot::forLValue( + dest, *this, AggValueSlot::IsNotDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, + Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed); EmitAggExpr(init, slot); return; diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index f90d9439af257..6a1a73955319c 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1076,7 +1076,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type.getNonReferenceType(), VK_LValue, SourceLocation()); - src = EmitDeclRefLValue(&declRef).getAddress(); + src = EmitDeclRefLValue(&declRef).getAddress(*this); }; // For byrefs, we just write the pointer to the byref struct into diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 676ea85e89e02..9b5fe9530210f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3367,7 +3367,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Carry); } case Builtin::BI__builtin_addressof: - return RValue::get(EmitLValue(E->getArg(0)).getPointer()); + return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__builtin_operator_new: return EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs(), E, false); @@ -3750,8 +3750,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); - llvm::Value *Range = NDRangeL.getAddress().getPointer(); - llvm::Type *RangeTy = NDRangeL.getAddress().getType(); + llvm::Value *Range = NDRangeL.getAddress(*this).getPointer(); + llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: @@ -3770,7 +3770,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B; - B.addByValAttr(NDRangeL.getAddress().getElementType()); + B.addByValAttr(NDRangeL.getAddress(*this).getElementType()); llvm::AttributeList ByValAttrSet = llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); @@ -3955,7 +3955,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); - llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); + llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); @@ -4458,6 +4458,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), + NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, arm_neon_vacge, 0), NEONMAP1(vcageq_v, arm_neon_vacge, 0), NEONMAP1(vcagt_v, arm_neon_vacgt, 0), @@ -4625,10 +4629,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), - NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), - NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), - NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), - NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), + NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), + NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), @@ -4646,8 +4650,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), - NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), - NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), @@ -4731,6 +4735,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, aarch64_neon_facge, 0), NEONMAP1(vcageq_v, aarch64_neon_facge, 0), NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), @@ -9466,14 +9474,14 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g"); return IsBitField ? EmitLValue(Arg).getBitFieldPointer() - : EmitLValue(Arg).getPointer(); + : EmitLValue(Arg).getPointer(*this); } // Enable underlying preserve_*_access_index() generation. bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; IsInPreservedAIRegion = true; Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() - : EmitLValue(Arg).getPointer(); + : EmitLValue(Arg).getPointer(*this); IsInPreservedAIRegion = OldIsInPreservedAIRegion; ConstantInt *C = cast(EmitScalarExpr(E->getArg(1))); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 3d292f84c79b4..fe778e3714347 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1020,13 +1020,13 @@ void CodeGenFunction::ExpandTypeFromArgs( auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast(Exp.get())) { - forConstantArrayExpansion(*this, CAExp, LV.getAddress(), - [&](Address EltAddr) { - LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy); - ExpandTypeFromArgs(CAExp->EltTy, LV, AI); - }); + forConstantArrayExpansion( + *this, CAExp, LV.getAddress(*this), [&](Address EltAddr) { + LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy); + ExpandTypeFromArgs(CAExp->EltTy, LV, AI); + }); } else if (auto RExp = dyn_cast(Exp.get())) { - Address This = LV.getAddress(); + Address This = LV.getAddress(*this); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. Address Base = @@ -1047,8 +1047,13 @@ void CodeGenFunction::ExpandTypeFromArgs( auto imagValue = *AI++; EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /*init*/ true); } else { + // Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a + // primitive store. assert(isa(Exp.get())); - EmitStoreThroughLValue(RValue::get(*AI++), LV); + if (LV.isBitField()) + EmitStoreThroughLValue(RValue::get(*AI++), LV); + else + EmitStoreOfScalar(*AI++, LV); } } @@ -1057,7 +1062,7 @@ void CodeGenFunction::ExpandTypeToArgs( SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos) { auto Exp = getTypeExpansion(Ty, getContext()); if (auto CAExp = dyn_cast(Exp.get())) { - Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress(*this) : Arg.getKnownRValue().getAggregateAddress(); forConstantArrayExpansion( *this, CAExp, Addr, [&](Address EltAddr) { @@ -1068,7 +1073,7 @@ void CodeGenFunction::ExpandTypeToArgs( IRCallArgPos); }); } else if (auto RExp = dyn_cast(Exp.get())) { - Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress() + Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress(*this) : Arg.getKnownRValue().getAggregateAddress(); for (const CXXBaseSpecifier *BS : RExp->Bases) { // Perform a single step derived-to-base conversion. @@ -3141,7 +3146,7 @@ static bool isProvablyNull(llvm::Value *addr) { static void emitWriteback(CodeGenFunction &CGF, const CallArgList::Writeback &writeback) { const LValue &srcLV = writeback.Source; - Address srcAddr = srcLV.getAddress(); + Address srcAddr = srcLV.getAddress(CGF); assert(!isProvablyNull(srcAddr.getPointer()) && "shouldn't have writeback for provably null argument"); @@ -3249,7 +3254,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args, CRE->getSubExpr()->getType()->castAs()->getPointeeType(); srcLV = CGF.MakeAddrLValue(srcAddr, srcAddrType); } - Address srcAddr = srcLV.getAddress(); + Address srcAddr = srcLV.getAddress(CGF); // The dest and src types don't necessarily match in LLVM terms // because of the crazy ObjC compatibility rules. @@ -3563,7 +3568,7 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const { CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap, LV.isVolatile()); IsUsed = true; - return RValue::getAggregate(Copy.getAddress()); + return RValue::getAggregate(Copy.getAddress(CGF)); } void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { @@ -3573,7 +3578,7 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { else if (!HasLV && RV.isComplex()) CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true); else { - auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress(); + auto Addr = HasLV ? LV.getAddress(CGF) : RV.getAggregateAddress(); LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty); // We assume that call args are never copied into subobjects. CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap, @@ -3936,7 +3941,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (I->isAggregate()) { // Replace the placeholder with the appropriate argument slot GEP. Address Addr = I->hasLValue() - ? I->getKnownLValue().getAddress() + ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); llvm::Instruction *Placeholder = cast(Addr.getPointer()); @@ -3981,7 +3986,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // 3. If the argument is byval, but RV is not located in default // or alloca address space. Address Addr = I->hasLValue() - ? I->getKnownLValue().getAddress() + ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); llvm::Value *V = Addr.getPointer(); CharUnits Align = ArgInfo.getIndirectAlign(); @@ -4068,7 +4073,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, V = I->getKnownRValue().getScalarVal(); else V = Builder.CreateLoad( - I->hasLValue() ? I->getKnownLValue().getAddress() + I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress()); // Implement swifterror by copying into a new swifterror argument. @@ -4122,7 +4127,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Src = CreateMemTemp(I->Ty, "coerce"); I->copyInto(*this, Src); } else { - Src = I->hasLValue() ? I->getKnownLValue().getAddress() + Src = I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); } @@ -4177,7 +4182,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address addr = Address::invalid(); Address AllocaAddr = Address::invalid(); if (I->isAggregate()) { - addr = I->hasLValue() ? I->getKnownLValue().getAddress() + addr = I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); } else { @@ -4359,6 +4364,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Callee.getAbstractInfo(), Attrs, CallingConv, /*AttrOnCallSite=*/true); + if (const FunctionDecl *FD = dyn_cast_or_null(CurFuncDecl)) + if (FD->usesFPIntrin()) + // All calls within a strictfp function are marked strictfp + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::StrictFP); + // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -4408,6 +4420,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SmallVector BundleList = getBundlesForFunclet(CalleePtr); + if (const FunctionDecl *FD = dyn_cast_or_null(CurFuncDecl)) + if (FD->usesFPIntrin()) + // All calls within a strictfp function are marked strictfp + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::StrictFP); + // Emit the actual call/invoke instruction. llvm::CallBase *CI; if (!InvokeDest) { diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index bcc58dcbc2c6c..ebd70302ba4cc 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -657,7 +657,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, // the constructor. QualType::DestructionKind dtorKind = FieldType.isDestructedType(); if (CGF.needsEHCleanup(dtorKind)) - CGF.pushEHDestroy(dtorKind, LHS.getAddress(), FieldType); + CGF.pushEHDestroy(dtorKind, LHS.getAddress(CGF), FieldType); return; } } @@ -681,16 +681,12 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, EmitComplexExprIntoLValue(Init, LHS, /*isInit*/ true); break; case TEK_Aggregate: { - AggValueSlot Slot = - AggValueSlot::forLValue( - LHS, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - getOverlapForFieldInit(Field), - AggValueSlot::IsNotZeroed, - // Checks are made by the code that calls constructor. - AggValueSlot::IsSanitizerChecked); + AggValueSlot Slot = AggValueSlot::forLValue( + LHS, *this, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + getOverlapForFieldInit(Field), AggValueSlot::IsNotZeroed, + // Checks are made by the code that calls constructor. + AggValueSlot::IsSanitizerChecked); EmitAggExpr(Init, Slot); break; } @@ -700,7 +696,7 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, // later in the constructor. QualType::DestructionKind dtorKind = FieldType.isDestructedType(); if (needsEHCleanup(dtorKind)) - pushEHDestroy(dtorKind, LHS.getAddress(), FieldType); + pushEHDestroy(dtorKind, LHS.getAddress(*this), FieldType); } /// Checks whether the given constructor is a valid subject for the @@ -914,6 +910,8 @@ namespace { } void addMemcpyableField(FieldDecl *F) { + if (F->isZeroSize(CGF.getContext())) + return; if (!FirstField) addInitialField(F); else @@ -961,9 +959,10 @@ namespace { LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy); LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField); - emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddress() : Dest.getAddress(), - Src.isBitField() ? Src.getBitFieldAddress() : Src.getAddress(), - MemcpySize); + emitMemcpyIR( + Dest.isBitField() ? Dest.getBitFieldAddress() : Dest.getAddress(CGF), + Src.isBitField() ? Src.getBitFieldAddress() : Src.getAddress(CGF), + MemcpySize); reset(); } @@ -1117,7 +1116,7 @@ namespace { continue; LValue FieldLHS = LHS; EmitLValueForAnyFieldInitialization(CGF, MemberInit, FieldLHS); - CGF.pushEHDestroy(dtorKind, FieldLHS.getAddress(), FieldType); + CGF.pushEHDestroy(dtorKind, FieldLHS.getAddress(CGF), FieldType); } } @@ -1627,7 +1626,7 @@ namespace { LValue LV = CGF.EmitLValueForField(ThisLV, field); assert(LV.isSimple()); - CGF.emitDestroy(LV.getAddress(), field->getType(), destroyer, + CGF.emitDestroy(LV.getAddress(CGF), field->getType(), destroyer, flags.isForNormalCleanup() && useEHCleanupForArray); } }; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index db5893a7b51f2..8d6406c027738 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1141,10 +1141,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, // declared. SourceLocation Loc = Ty->getDecl()->getLocation(); + uint32_t Align = getDeclAlignIfRequired(Ty->getDecl(), CGM.getContext()); // Typedefs are derived from some other type. return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc), - getDeclContextDescriptor(Ty->getDecl())); + getDeclContextDescriptor(Ty->getDecl()), Align); } static unsigned getDwarfCC(CallingConv CC) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 13e9c7a38fccd..8e74f7e019655 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -83,7 +83,8 @@ class CGDebugInfo { /// Cache of previously constructed Types. llvm::DenseMap TypeCache; - llvm::SmallDenseMap DebugPrefixMap; + std::map> + DebugPrefixMap; /// Cache that maps VLA types to size expressions for that type, /// represented by instantiated Metadata nodes. diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index a984f67f61768..56ddc983dafcd 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -597,7 +597,7 @@ namespace { Var.getType(), VK_LValue, SourceLocation()); // Compute the address of the local variable, in case it's a byref // or something. - llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getPointer(); + llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getPointer(CGF); // In some cases, the type of the function argument will be different from // the type of the pointer. An example of this is @@ -712,18 +712,18 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, LValue srcLV = CGF.EmitLValue(srcExpr); // Handle a formal type change to avoid asserting. - auto srcAddr = srcLV.getAddress(); + auto srcAddr = srcLV.getAddress(CGF); if (needsCast) { - srcAddr = CGF.Builder.CreateElementBitCast(srcAddr, - destLV.getAddress().getElementType()); + srcAddr = CGF.Builder.CreateElementBitCast( + srcAddr, destLV.getAddress(CGF).getElementType()); } // If it was an l-value, use objc_copyWeak. if (srcExpr->getValueKind() == VK_LValue) { - CGF.EmitARCCopyWeak(destLV.getAddress(), srcAddr); + CGF.EmitARCCopyWeak(destLV.getAddress(CGF), srcAddr); } else { assert(srcExpr->getValueKind() == VK_XValue); - CGF.EmitARCMoveWeak(destLV.getAddress(), srcAddr); + CGF.EmitARCMoveWeak(destLV.getAddress(CGF), srcAddr); } return true; } @@ -741,7 +741,7 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, static void drillIntoBlockVariable(CodeGenFunction &CGF, LValue &lvalue, const VarDecl *var) { - lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var)); + lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(CGF), var)); } void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, @@ -801,17 +801,18 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, if (capturedByInit) { // We can use a simple GEP for this because it can't have been // moved yet. - tempLV.setAddress(emitBlockByrefAddress(tempLV.getAddress(), + tempLV.setAddress(emitBlockByrefAddress(tempLV.getAddress(*this), cast(D), /*follow*/ false)); } - auto ty = cast(tempLV.getAddress().getElementType()); + auto ty = + cast(tempLV.getAddress(*this).getElementType()); llvm::Value *zero = CGM.getNullPointer(ty, tempLV.getType()); // If __weak, we want to use a barrier under certain conditions. if (lifetime == Qualifiers::OCL_Weak) - EmitARCInitWeak(tempLV.getAddress(), zero); + EmitARCInitWeak(tempLV.getAddress(*this), zero); // Otherwise just do a simple store. else @@ -854,9 +855,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); if (accessedByInit) - EmitARCStoreWeak(lvalue.getAddress(), value, /*ignored*/ true); + EmitARCStoreWeak(lvalue.getAddress(*this), value, /*ignored*/ true); else - EmitARCInitWeak(lvalue.getAddress(), value); + EmitARCInitWeak(lvalue.getAddress(*this), value); return; } @@ -1940,11 +1941,10 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, else if (auto *FD = dyn_cast(D)) Overlap = getOverlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? - EmitAggExpr(init, AggValueSlot::forLValue(lvalue, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - Overlap)); + EmitAggExpr(init, AggValueSlot::forLValue( + lvalue, *this, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, Overlap)); } return; } @@ -2500,7 +2500,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // objc_storeStrong attempts to release its old value. llvm::Value *Null = CGM.EmitNullConstant(D.getType()); EmitStoreOfScalar(Null, lv, /* isInitialization */ true); - EmitARCStoreStrongCall(lv.getAddress(), ArgVal, true); + EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true); DoStore = false; } else diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 03351dbe0672e..d54dd87e3c00c 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -56,10 +56,11 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, CGF.EmitComplexExprIntoLValue(Init, lv, /*isInit*/ true); return; case TEK_Aggregate: - CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::DoesNotOverlap)); + CGF.EmitAggExpr(Init, + AggValueSlot::forLValue(lv, CGF, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return; } llvm_unreachable("bad evaluation kind"); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6becd35976d85..35009a1c285c8 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -573,7 +573,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { LV = EmitLValueForField(LV, Adjustment.Field); assert(LV.isSimple() && "materialized temporary field is not a simple lvalue"); - Object = LV.getAddress(); + Object = LV.getAddress(*this); break; } @@ -594,7 +594,7 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E) { // Emit the expression as an lvalue. LValue LV = EmitLValue(E); assert(LV.isSimple()); - llvm::Value *Value = LV.getPointer(); + llvm::Value *Value = LV.getPointer(*this); if (sanitizePerformTypeCheck() && !E->getType()->isFunctionType()) { // C++11 [dcl.ref]p5 (as amended by core issue 453): @@ -1127,7 +1127,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, LValue LV = EmitLValue(UO->getSubExpr()); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); - return LV.getAddress(); + return LV.getAddress(*this); } } @@ -1217,8 +1217,8 @@ LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { if (IsBaseCXXThis || isa(ME->getBase())) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), - E->getType(), LV.getAlignment(), SkippedChecks); + EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(*this), E->getType(), + LV.getAlignment(), SkippedChecks); } return LV; } @@ -1305,7 +1305,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { if (LV.isSimple()) { // Defend against branches out of gnu statement expressions surrounded by // cleanups. - llvm::Value *V = LV.getPointer(); + llvm::Value *V = LV.getPointer(*this); Scope.ForceCleanup({&V}); return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), getContext(), LV.getBaseInfo(), LV.getTBAAInfo()); @@ -1521,7 +1521,7 @@ llvm::Value *CodeGenFunction::emitScalarConstant( llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { - return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), + return EmitLoadOfScalar(lvalue.getAddress(*this), lvalue.isVolatile(), lvalue.getType(), Loc, lvalue.getBaseInfo(), lvalue.getTBAAInfo(), lvalue.isNontemporal()); } @@ -1771,7 +1771,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit) { - EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), + EmitStoreOfScalar(value, lvalue.getAddress(*this), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal()); } @@ -1782,18 +1782,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { if (LV.isObjCWeak()) { // load of a __weak object. - Address AddrWeakObj = LV.getAddress(); + Address AddrWeakObj = LV.getAddress(*this); return RValue::get(CGM.getObjCRuntime().EmitObjCWeakRead(*this, AddrWeakObj)); } if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) { // In MRC mode, we do a load+autorelease. if (!getLangOpts().ObjCAutoRefCount) { - return RValue::get(EmitARCLoadWeak(LV.getAddress())); + return RValue::get(EmitARCLoadWeak(LV.getAddress(*this))); } // In ARC mode, we load retained and then consume the value. - llvm::Value *Object = EmitARCLoadWeakRetained(LV.getAddress()); + llvm::Value *Object = EmitARCLoadWeakRetained(LV.getAddress(*this)); Object = EmitObjCConsumeObject(LV.getType(), Object); return RValue::get(Object); } @@ -1979,9 +1979,10 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, case Qualifiers::OCL_Weak: if (isInit) // Initialize and then skip the primitive store. - EmitARCInitWeak(Dst.getAddress(), Src.getScalarVal()); + EmitARCInitWeak(Dst.getAddress(*this), Src.getScalarVal()); else - EmitARCStoreWeak(Dst.getAddress(), Src.getScalarVal(), /*ignore*/ true); + EmitARCStoreWeak(Dst.getAddress(*this), Src.getScalarVal(), + /*ignore*/ true); return; case Qualifiers::OCL_Autoreleasing: @@ -1994,7 +1995,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, if (Dst.isObjCWeak() && !Dst.isNonGC()) { // load of a __weak object. - Address LvalueDst = Dst.getAddress(); + Address LvalueDst = Dst.getAddress(*this); llvm::Value *src = Src.getScalarVal(); CGM.getObjCRuntime().EmitObjCWeakAssign(*this, src, LvalueDst); return; @@ -2002,7 +2003,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, if (Dst.isObjCStrong() && !Dst.isNonGC()) { // load of a __strong object. - Address LvalueDst = Dst.getAddress(); + Address LvalueDst = Dst.getAddress(*this); llvm::Value *src = Src.getScalarVal(); if (Dst.isObjCIvar()) { assert(Dst.getBaseIvarExp() && "BaseIvarExp is NULL"); @@ -2328,8 +2329,8 @@ Address CodeGenFunction::EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo, TBAAAccessInfo *PointeeTBAAInfo) { - llvm::LoadInst *Load = Builder.CreateLoad(RefLVal.getAddress(), - RefLVal.isVolatile()); + llvm::LoadInst *Load = + Builder.CreateLoad(RefLVal.getAddress(*this), RefLVal.isVolatile()); CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo()); CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(), @@ -2585,7 +2586,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), CapturedStmtInfo->getContextValue()); return MakeAddrLValue( - Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)), + Address(CapLVal.getPointer(*this), getContext().getDeclAlign(VD)), CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl), CapLVal.getTBAAInfo()); } @@ -2720,7 +2721,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { // __real is valid on scalars. This is a faster way of testing that. // __imag can only produce an rvalue on scalars. if (E->getOpcode() == UO_Real && - !LV.getAddress().getElementType()->isStructTy()) { + !LV.getAddress(*this).getElementType()->isStructTy()) { assert(E->getSubExpr()->getType()->isArithmeticType()); return LV; } @@ -2728,9 +2729,9 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { QualType T = ExprTy->castAs()->getElementType(); Address Component = - (E->getOpcode() == UO_Real - ? emitAddrOfRealComponent(LV.getAddress(), LV.getType()) - : emitAddrOfImagComponent(LV.getAddress(), LV.getType())); + (E->getOpcode() == UO_Real + ? emitAddrOfRealComponent(LV.getAddress(*this), LV.getType()) + : emitAddrOfImagComponent(LV.getAddress(*this), LV.getType())); LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, T)); ElemLV.getQuals().addQualifiers(LV.getQuals()); @@ -3330,7 +3331,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, // Expressions of array type can't be bitfields or vector elements. LValue LV = EmitLValue(E); - Address Addr = LV.getAddress(); + Address Addr = LV.getAddress(*this); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -3545,8 +3546,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); - return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), - LHS.getBaseInfo(), TBAAAccessInfo()); + return LValue::MakeVectorElt(LHS.getAddress(*this), Idx, + E->getBase()->getType(), LHS.getBaseInfo(), + TBAAAccessInfo()); } // All the other cases basically behave like simple offsetting. @@ -3647,7 +3649,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // Propagate the alignment from the array itself to the result. QualType arrayType = Array->getType(); Addr = emitArraySubscriptGEP( - *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + *this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx}, E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, E->getExprLoc(), &arrayType, E->getBase(), "arrayidx", ArrayDecl); EltBaseInfo = ArrayLV.getBaseInfo(); @@ -3682,7 +3684,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, if (auto *ASE = dyn_cast(Base->IgnoreParenImpCasts())) { BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); if (BaseTy->isArrayType()) { - Address Addr = BaseLVal.getAddress(); + Address Addr = BaseLVal.getAddress(CGF); BaseInfo = BaseLVal.getBaseInfo(); // If the array type was an incomplete type, we need to make sure @@ -3707,7 +3709,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, &TypeTBAAInfo); BaseInfo.mergeForCast(TypeBaseInfo); TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo); - return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); + return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress(CGF)), Align); } return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); } @@ -3848,7 +3850,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, // Propagate the alignment from the array itself to the result. EltPtr = emitArraySubscriptGEP( - *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + *this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx}, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), /*signedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); @@ -3908,7 +3910,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { if (Base.isSimple()) { llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - return LValue::MakeExtVectorElt(Base.getAddress(), CV, type, + return LValue::MakeExtVectorElt(Base.getAddress(*this), CV, type, Base.getBaseInfo(), TBAAAccessInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -4059,7 +4061,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(field->getParent()); const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); - Address Addr = base.getAddress(); + Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); const RecordDecl *rec = field->getParent(); if (!IsInPreservedAIRegion && @@ -4127,7 +4129,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, getContext().getTypeSizeInChars(FieldType).getQuantity(); } - Address addr = base.getAddress(); + Address addr = base.getAddress(*this); if (auto *ClassDef = dyn_cast(rec)) { if (CGM.getCodeGenOpts().StrictVTablePointers && ClassDef->isDynamicClass()) { @@ -4223,7 +4225,7 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, if (!FieldType->isReferenceType()) return EmitLValueForField(Base, Field); - Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field); + Address V = emitAddrOfFieldStorage(*this, Base.getAddress(*this), Field); // Make sure that the address is pointing to the right type. llvm::Type *llvmType = ConvertTypeForMem(FieldType); @@ -4341,8 +4343,8 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { EmitBlock(contBlock); if (lhs && rhs) { - llvm::Value *lhsPtr = lhs->getPointer(); - llvm::Value *rhsPtr = rhs->getPointer(); + llvm::Value *lhsPtr = lhs->getPointer(*this); + llvm::Value *rhsPtr = rhs->getPointer(*this); if (rhsPtr->getType() != lhsPtr->getType()) { if (!getLangOpts().SYCLIsDevice) llvm_unreachable( @@ -4372,8 +4374,8 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { phi->addIncoming(rhsPtr, rhsBlock); Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment())); AlignmentSource alignSource = - std::max(lhs->getBaseInfo().getAlignmentSource(), - rhs->getBaseInfo().getAlignmentSource()); + std::max(lhs->getBaseInfo().getAlignmentSource(), + rhs->getBaseInfo().getAlignmentSource()); TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForConditionalOperator( lhs->getTBAAInfo(), rhs->getTBAAInfo()); return MakeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource), @@ -4452,7 +4454,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_Dynamic: { LValue LV = EmitLValue(E->getSubExpr()); - Address V = LV.getAddress(); + Address V = LV.getAddress(*this); const auto *DCE = cast(E); return MakeNaturalAlignAddrLValue(EmitDynamicCast(V, DCE), E->getType()); } @@ -4472,7 +4474,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { auto *DerivedClassDecl = cast(DerivedClassTy->getDecl()); LValue LV = EmitLValue(E->getSubExpr()); - Address This = LV.getAddress(); + Address This = LV.getAddress(*this); // Perform the derived-to-base conversion Address Base = GetAddressOfBaseClass( @@ -4494,10 +4496,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { LValue LV = EmitLValue(E->getSubExpr()); // Perform the base-to-derived conversion - Address Derived = - GetAddressOfDerivedClass(LV.getAddress(), DerivedClassDecl, - E->path_begin(), E->path_end(), - /*NullCheckValue=*/false); + Address Derived = GetAddressOfDerivedClass( + LV.getAddress(*this), DerivedClassDecl, E->path_begin(), E->path_end(), + /*NullCheckValue=*/false); // C++11 [expr.static.cast]p2: Behavior is undefined if a downcast is // performed and the object is not of the derived type. @@ -4519,7 +4520,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { CGM.EmitExplicitCastExprType(CE, this); LValue LV = EmitLValue(E->getSubExpr()); - Address V = Builder.CreateBitCast(LV.getAddress(), + Address V = Builder.CreateBitCast(LV.getAddress(*this), ConvertType(CE->getTypeAsWritten())); if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) @@ -4534,14 +4535,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { LValue LV = EmitLValue(E->getSubExpr()); QualType DestTy = getContext().getPointerType(E->getType()); llvm::Value *V = getTargetHooks().performAddrSpaceCast( - *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(), + *this, LV.getPointer(*this), + E->getSubExpr()->getType().getAddressSpace(), E->getType().getAddressSpace(), ConvertType(DestTy)); - return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()), + return MakeAddrLValue(Address(V, LV.getAddress(*this).getAlignment()), E->getType(), LV.getBaseInfo(), LV.getTBAAInfo()); } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); - Address V = Builder.CreateElementBitCast(LV.getAddress(), + Address V = Builder.CreateElementBitCast(LV.getAddress(*this), ConvertType(E->getType())); return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); @@ -4595,13 +4597,17 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV, case TEK_Complex: return RValue::getComplex(EmitLoadOfComplex(FieldLV, Loc)); case TEK_Aggregate: - return FieldLV.asAggregateRValue(); + return FieldLV.asAggregateRValue(*this); case TEK_Scalar: // This routine is used to load fields one-by-one to perform a copy, so // don't load reference fields. if (FD->getType()->isReferenceType()) - return RValue::get(FieldLV.getPointer()); - return EmitLoadOfLValue(FieldLV, Loc); + return RValue::get(FieldLV.getPointer(*this)); + // Call EmitLoadOfScalar except when the lvalue is a bitfield to emit a + // primitive load. + if (FieldLV.isBitField()) + return EmitLoadOfLValue(FieldLV, Loc); + return RValue::get(EmitLoadOfScalar(FieldLV, Loc)); } llvm_unreachable("bad evaluation kind"); } @@ -4695,7 +4701,7 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { functionType = ptrType->getPointeeType(); } else { functionType = E->getType(); - calleePtr = EmitLValue(E).getPointer(); + calleePtr = EmitLValue(E).getPointer(*this); } assert(functionType->isFunctionType()); @@ -4855,7 +4861,7 @@ LValue CodeGenFunction::EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E) { BaseQuals = ObjectTy.getQualifiers(); } else { LValue BaseLV = EmitLValue(BaseExpr); - BaseValue = BaseLV.getPointer(); + BaseValue = BaseLV.getPointer(*this); ObjectTy = BaseExpr->getType(); BaseQuals = ObjectTy.getQualifiers(); } @@ -5065,7 +5071,7 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { if (E->getOpcode() == BO_PtrMemI) { BaseAddr = EmitPointerWithAlignment(E->getLHS()); } else { - BaseAddr = EmitLValue(E->getLHS()).getAddress(); + BaseAddr = EmitLValue(E->getLHS()).getAddress(*this); } llvm::Value *OffsetV = EmitScalarExpr(E->getRHS()); @@ -5092,7 +5098,7 @@ RValue CodeGenFunction::convertTempToRValue(Address addr, case TEK_Complex: return RValue::getComplex(EmitLoadOfComplex(lvalue, loc)); case TEK_Aggregate: - return lvalue.asAggregateRValue(); + return lvalue.asAggregateRValue(*this); case TEK_Scalar: return RValue::get(EmitLoadOfScalar(lvalue, loc)); } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 7e69f63fe1354..41a9329386559 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -345,10 +345,9 @@ void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src, } } - AggValueSlot srcAgg = - AggValueSlot::forLValue(src, AggValueSlot::IsDestructed, - needsGC(type), AggValueSlot::IsAliased, - AggValueSlot::MayOverlap); + AggValueSlot srcAgg = AggValueSlot::forLValue( + src, CGF, AggValueSlot::IsDestructed, needsGC(type), + AggValueSlot::IsAliased, AggValueSlot::MayOverlap); EmitCopy(type, Dest, srcAgg); } @@ -386,7 +385,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) { ASTContext &Ctx = CGF.getContext(); LValue Array = CGF.EmitLValue(E->getSubExpr()); assert(Array.isSimple() && "initializer_list array not a simple lvalue"); - Address ArrayPtr = Array.getAddress(); + Address ArrayPtr = Array.getAddress(CGF); const ConstantArrayType *ArrayType = Ctx.getAsConstantArrayType(E->getSubExpr()->getType()); @@ -493,7 +492,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, if (NumInitElements * elementSize.getQuantity() > 16 && elementType.isTriviallyCopyableType(CGF.getContext())) { CodeGen::CodeGenModule &CGM = CGF.CGM; - ConstantEmitter Emitter(CGM); + ConstantEmitter Emitter(CGF); LangAS AS = ArrayQTy.getAddressSpace(); if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) { auto GV = new llvm::GlobalVariable( @@ -688,7 +687,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { CodeGenFunction::TCK_Load); // FIXME: Do we also need to handle property references here? if (LV.isSimple()) - CGF.EmitDynamicCast(LV.getAddress(), cast(E)); + CGF.EmitDynamicCast(LV.getAddress(CGF), cast(E)); else CGF.CGM.ErrorUnsupported(E, "non-simple lvalue dynamic_cast"); @@ -723,7 +722,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { LValue SourceLV = CGF.EmitLValue(E->getSubExpr()); Address SourceAddress = - Builder.CreateElementBitCast(SourceLV.getAddress(), CGF.Int8Ty); + Builder.CreateElementBitCast(SourceLV.getAddress(CGF), CGF.Int8Ty); Address DestAddress = Builder.CreateElementBitCast(Dest.getAddress(), CGF.Int8Ty); llvm::Value *SizeVal = llvm::ConstantInt::get( @@ -1163,7 +1162,7 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { } EmitCopy(E->getLHS()->getType(), - AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, + AggValueSlot::forLValue(LHS, CGF, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), AggValueSlot::IsAliased, AggValueSlot::MayOverlap), @@ -1184,11 +1183,9 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { } // Codegen the RHS so that it stores directly into the LHS. - AggValueSlot LHSSlot = - AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, - needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased, - AggValueSlot::MayOverlap); + AggValueSlot LHSSlot = AggValueSlot::forLValue( + LHS, CGF, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), + AggValueSlot::IsAliased, AggValueSlot::MayOverlap); // A non-volatile aggregate destination might have volatile member. if (!LHSSlot.isVolatile() && CGF.hasVolatileMember(E->getLHS()->getType())) @@ -1320,7 +1317,7 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { llvm::Constant::getNullValue(CGF.Int8PtrTy), CharUnits::One()); // placeholder - CGF.pushDestroy(EHCleanup, LV.getAddress(), CurField->getType(), + CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), CurField->getType(), CGF.getDestroyer(DtorKind), false); Cleanups.push_back(CGF.EHStack.stable_begin()); } @@ -1408,12 +1405,11 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) { CGF.EmitComplexExprIntoLValue(E, LV, /*isInit*/ true); return; case TEK_Aggregate: - CGF.EmitAggExpr(E, AggValueSlot::forLValue(LV, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::MayOverlap, - Dest.isZeroed())); + CGF.EmitAggExpr( + E, AggValueSlot::forLValue(LV, CGF, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap, Dest.isZeroed())); return; case TEK_Scalar: if (LV.isSimple()) { @@ -1449,7 +1445,7 @@ void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) { // There's a potential optimization opportunity in combining // memsets; that would be easy for arrays, but relatively // difficult for structures with the current code. - CGF.EmitNullInitialization(lv.getAddress(), lv.getType()); + CGF.EmitNullInitialization(lv.getAddress(CGF), lv.getType()); } } @@ -1606,7 +1602,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { = field->getType().isDestructedType()) { assert(LV.isSimple()); if (CGF.needsEHCleanup(dtorKind)) { - CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(), + CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), field->getType(), CGF.getDestroyer(dtorKind), false); addCleanup(CGF.EHStack.stable_begin()); pushedCleanup = true; @@ -1617,7 +1613,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // else, clean it up for -O0 builds and general tidiness. if (!pushedCleanup && LV.isSimple()) if (llvm::GetElementPtrInst *GEP = - dyn_cast(LV.getPointer())) + dyn_cast(LV.getPointer(CGF))) if (GEP->use_empty()) GEP->eraseFromParent(); } @@ -1699,9 +1695,8 @@ void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, if (InnerLoop) { // If the subexpression is an ArrayInitLoopExpr, share its cleanup. auto elementSlot = AggValueSlot::forLValue( - elementLV, AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, + elementLV, CGF, AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap); AggExprEmitter(CGF, elementSlot, false) .VisitArrayInitLoopExpr(InnerLoop, outerBegin); @@ -1864,10 +1859,10 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { assert(hasAggregateEvaluationKind(E->getType()) && "Invalid argument!"); Address Temp = CreateMemTemp(E->getType()); LValue LV = MakeAddrLValue(Temp, E->getType()); - EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased, - AggValueSlot::DoesNotOverlap)); + EmitAggExpr(E, AggValueSlot::forLValue( + LV, *this, AggValueSlot::IsNotDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap)); return LV; } @@ -1916,8 +1911,8 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, bool isVolatile) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); - Address DestPtr = Dest.getAddress(); - Address SrcPtr = Src.getAddress(); + Address DestPtr = Dest.getAddress(*this); + Address SrcPtr = Src.getAddress(*this); if (getLangOpts().CPlusPlus) { if (const RecordType *RT = Ty->getAs()) { diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 114d806d454bb..269b80b434032 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -133,7 +133,7 @@ RValue CodeGenFunction::EmitCXXPseudoDestructorExpr( BaseQuals = PTy->getPointeeType().getQualifiers(); } else { LValue BaseLV = EmitLValue(BaseExpr); - BaseValue = BaseLV.getAddress(); + BaseValue = BaseLV.getAddress(*this); QualType BaseTy = BaseExpr->getType(); BaseQuals = BaseTy.getQualifiers(); } @@ -271,11 +271,11 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); CallArgList Args; commonEmitCXXMemberOrOperatorCall( - *this, Ctor, This.getPointer(), /*ImplicitParam=*/nullptr, + *this, Ctor, This.getPointer(*this), /*ImplicitParam=*/nullptr, /*ImplicitParamTy=*/QualType(), CE, Args, nullptr); EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, This.getAddress(), Args, + /*Delegating=*/false, This.getAddress(*this), Args, AggValueSlot::DoesNotOverlap, CE->getExprLoc(), /*NewPointerIsChecked=*/false); return RValue::get(nullptr); @@ -293,7 +293,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( (*(CE->arg_begin() + 1))->getType()) : EmitLValue(*CE->arg_begin()); EmitAggregateAssign(This, RHS, CE->getType()); - return RValue::get(This.getPointer()); + return RValue::get(This.getPointer(*this)); } llvm_unreachable("unknown trivial member function"); } @@ -328,7 +328,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (IsImplicitObjectCXXThis || isa(IOA)) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, This.getPointer(), + EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, + This.getPointer(*this), C.getRecordType(CalleeDecl->getParent()), /*Alignment=*/CharUnits::Zero(), SkippedChecks); @@ -345,9 +346,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( "Destructor shouldn't have explicit parameters"); assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); if (UseVirtualCall) { - CGM.getCXXABI().EmitVirtualDestructorCall( - *this, Dtor, Dtor_Complete, This.getAddress(), - cast(CE)); + CGM.getCXXABI().EmitVirtualDestructorCall(*this, Dtor, Dtor_Complete, + This.getAddress(*this), + cast(CE)); } else { GlobalDecl GD(Dtor, Dtor_Complete); CGCallee Callee; @@ -362,7 +363,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( QualType ThisTy = IsArrow ? Base->getType()->getPointeeType() : Base->getType(); - EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, + EmitCXXDestructorCall(GD, Callee, This.getPointer(*this), ThisTy, /*ImplicitParam=*/nullptr, /*ImplicitParamTy=*/QualType(), nullptr); } @@ -374,15 +375,14 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CGCallee Callee; if (UseVirtualCall) { - Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); + Callee = CGCallee::forVirtual(CE, MD, This.getAddress(*this), Ty); } else { if (SanOpts.has(SanitizerKind::CFINVCall) && MD->getParent()->isDynamicClass()) { llvm::Value *VTable; const CXXRecordDecl *RD; - std::tie(VTable, RD) = - CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(), - CalleeDecl->getParent()); + std::tie(VTable, RD) = CGM.getCXXABI().LoadVTablePtr( + *this, This.getAddress(*this), CalleeDecl->getParent()); EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc()); } @@ -401,12 +401,12 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (MD->isVirtual()) { Address NewThisAddr = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall( - *this, CalleeDecl, This.getAddress(), UseVirtualCall); + *this, CalleeDecl, This.getAddress(*this), UseVirtualCall); This.setAddress(NewThisAddr); } return EmitCXXMemberOrOperatorCall( - CalleeDecl, Callee, ReturnValue, This.getPointer(), + CalleeDecl, Callee, ReturnValue, This.getPointer(*this), /*ImplicitParam=*/nullptr, QualType(), CE, RtlArgs); } @@ -428,7 +428,7 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, if (BO->getOpcode() == BO_PtrMemI) This = EmitPointerWithAlignment(BaseExpr); else - This = EmitLValue(BaseExpr).getAddress(); + This = EmitLValue(BaseExpr).getAddress(*this); EmitTypeCheck(TCK_MemberCall, E->getExprLoc(), This.getPointer(), QualType(MPT->getClass(), 0)); @@ -2103,7 +2103,7 @@ static bool isGLValueFromPointerDeref(const Expr *E) { static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, llvm::Type *StdTypeInfoPtrTy) { // Get the vtable pointer. - Address ThisPtr = CGF.EmitLValue(E).getAddress(); + Address ThisPtr = CGF.EmitLValue(E).getAddress(CGF); QualType SrcRecordTy = E->getType(); diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 385f87f12a9b3..6b11969771567 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -348,7 +348,7 @@ ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue, if (lvalue.getType()->isAtomicType()) return CGF.EmitAtomicLoad(lvalue, loc).getComplexVal(); - Address SrcPtr = lvalue.getAddress(); + Address SrcPtr = lvalue.getAddress(CGF); bool isVolatile = lvalue.isVolatileQualified(); llvm::Value *Real = nullptr, *Imag = nullptr; @@ -374,7 +374,7 @@ void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, LValue lvalue, (!isInit && CGF.LValueIsSuitableForInlineAtomic(lvalue))) return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit); - Address Ptr = lvalue.getAddress(); + Address Ptr = lvalue.getAddress(CGF); Address RealPtr = CGF.emitAddrOfRealComponent(Ptr, lvalue.getType()); Address ImagPtr = CGF.emitAddrOfImagComponent(Ptr, lvalue.getType()); @@ -463,14 +463,14 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_LValueBitCast: { LValue origLV = CGF.EmitLValue(Op); - Address V = origLV.getAddress(); + Address V = origLV.getAddress(CGF); V = Builder.CreateElementBitCast(V, CGF.ConvertType(DestTy)); return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy), Op->getExprLoc()); } case CK_LValueToRValueBitCast: { LValue SourceLVal = CGF.EmitLValue(Op); - Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(CGF), CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 69df9e4103b14..84dbb55be3e1e 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -615,7 +615,7 @@ class ScalarExprEmitter if (isa(E->getType())) // never sugared return CGF.CGM.getMemberPointerConstant(E); - return EmitLValue(E->getSubExpr()).getPointer(); + return EmitLValue(E->getSubExpr()).getPointer(CGF); } Value *VisitUnaryDeref(const UnaryOperator *E) { if (E->getType()->isVoidType()) @@ -644,8 +644,8 @@ class ScalarExprEmitter auto &Ctx = CGF.getContext(); APValue Evaluated = SLE->EvaluateInContext(Ctx, CGF.CurSourceLocExprScope.getDefaultExpr()); - return ConstantEmitter(CGF.CGM, &CGF) - .emitAbstract(SLE->getLocation(), Evaluated, SLE->getType()); + return ConstantEmitter(CGF).emitAbstract(SLE->getLocation(), Evaluated, + SLE->getType()); } Value *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { @@ -976,6 +976,11 @@ EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, return std::make_pair(Kind, std::make_pair(Check, Mask)); } +static bool PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck( + QualType SrcType, QualType DstType) { + return SrcType->isIntegerType() && DstType->isIntegerType(); +} + void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc) { @@ -984,7 +989,8 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. - if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) return; unsigned SrcBits = Src->getType()->getScalarSizeInBits(); @@ -1095,7 +1101,8 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. - if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) return; bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); @@ -1972,7 +1979,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_LValueBitCast: case CK_ObjCObjectLValueCast: { - Address Addr = EmitLValue(E).getAddress(); + Address Addr = EmitLValue(E).getAddress(CGF); Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy)); LValue LV = CGF.MakeAddrLValue(Addr, DestTy); return EmitLoadOfLValue(LV, CE->getExprLoc()); @@ -1980,7 +1987,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_LValueToRValueBitCast: { LValue SourceLVal = CGF.EmitLValue(E); - Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(CGF), CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); @@ -2121,7 +2128,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_ArrayToPointerDecay: return CGF.EmitArrayToPointerDecay(E).getPointer(); case CK_FunctionToPointerDecay: - return EmitLValue(E).getPointer(); + return EmitLValue(E).getPointer(CGF); case CK_NullToPointer: if (MustVisitNullValue(E)) @@ -2386,14 +2393,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (isInc && type->isBooleanType()) { llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type); if (isPre) { - Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified()) - ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent); + Builder.CreateStore(True, LV.getAddress(CGF), LV.isVolatileQualified()) + ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent); return Builder.getTrue(); } // For atomic bool increment, we just store true and return it for // preincrement, do an atomic swap with true for postincrement return Builder.CreateAtomicRMW( - llvm::AtomicRMWInst::Xchg, LV.getPointer(), True, + llvm::AtomicRMWInst::Xchg, LV.getPointer(CGF), True, llvm::AtomicOrdering::SequentiallyConsistent); } // Special case for atomic increment / decrement on integers, emit @@ -2410,8 +2417,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Instruction::Sub; llvm::Value *amt = CGF.EmitToMemory( llvm::ConstantInt::get(ConvertType(type), 1, true), type); - llvm::Value *old = Builder.CreateAtomicRMW(aop, - LV.getPointer(), amt, llvm::AtomicOrdering::SequentiallyConsistent); + llvm::Value *old = + Builder.CreateAtomicRMW(aop, LV.getPointer(CGF), amt, + llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } value = EmitLoadOfLValue(LV, E->getExprLoc()); @@ -2442,9 +2450,51 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // Most common case by far: integer increment. } else if (type->isIntegerType()) { - // Note that signed integer inc/dec with width less than int can't - // overflow because of promotion rules; we're just eliding a few steps here. - if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { + QualType promotedType; + bool canPerformLossyDemotionCheck = false; + if (type->isPromotableIntegerType()) { + promotedType = CGF.getContext().getPromotedIntegerType(type); + assert(promotedType != type && "Shouldn't promote to the same type."); + canPerformLossyDemotionCheck = true; + canPerformLossyDemotionCheck &= + CGF.getContext().getCanonicalType(type) != + CGF.getContext().getCanonicalType(promotedType); + canPerformLossyDemotionCheck &= + PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck( + type, promotedType); + assert((!canPerformLossyDemotionCheck || + type->isSignedIntegerOrEnumerationType() || + promotedType->isSignedIntegerOrEnumerationType() || + ConvertType(type)->getScalarSizeInBits() == + ConvertType(promotedType)->getScalarSizeInBits()) && + "The following check expects that if we do promotion to different " + "underlying canonical type, at least one of the types (either " + "base or promoted) will be signed, or the bitwidths will match."); + } + if (CGF.SanOpts.hasOneOf( + SanitizerKind::ImplicitIntegerArithmeticValueChange) && + canPerformLossyDemotionCheck) { + // While `x += 1` (for `x` with width less than int) is modeled as + // promotion+arithmetics+demotion, and we can catch lossy demotion with + // ease; inc/dec with width less than int can't overflow because of + // promotion rules, so we omit promotion+demotion, which means that we can + // not catch lossy "demotion". Because we still want to catch these cases + // when the sanitizer is enabled, we perform the promotion, then perform + // the increment/decrement in the wider type, and finally + // perform the demotion. This will catch lossy demotions. + + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); + Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); + value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); + // Do pass non-default ScalarConversionOpts so that sanitizer check is + // emitted. + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), + ScalarConversionOpts(CGF.SanOpts)); + + // Note that signed integer inc/dec with width less than int can't + // overflow because of promotion rules; we're just eliding a few steps + // here. + } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { @@ -2957,7 +3007,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( E->getExprLoc()), LHSTy); Value *OldVal = Builder.CreateAtomicRMW( - AtomicOp, LHSLV.getPointer(), Amt, + AtomicOp, LHSLV.getPointer(CGF), Amt, llvm::AtomicOrdering::SequentiallyConsistent); // Since operation is atomic, the result type is guaranteed to be the @@ -4011,7 +4061,7 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_Weak: RHS = Visit(E->getRHS()); LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); - RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore); + RHS = CGF.EmitARCStoreWeak(LHS.getAddress(CGF), RHS, Ignore); break; case Qualifiers::OCL_None: @@ -4588,7 +4638,7 @@ LValue CodeGenFunction::EmitObjCIsaExpr(const ObjCIsaExpr *E) { if (BaseExpr->isRValue()) { Addr = Address(EmitScalarExpr(BaseExpr), getPointerAlign()); } else { - Addr = EmitLValue(BaseExpr).getAddress(); + Addr = EmitLValue(BaseExpr).getAddress(*this); } // Cast the address to Class*. diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp index 332e51e57ded0..d5f378c522322 100644 --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -707,7 +707,7 @@ struct GenMoveConstructor : GenBinaryFunc { LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); - CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV); + CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress(*CGF)), SrcLV); CGF->EmitStoreOfScalar(SrcVal, CGF->MakeAddrLValue(Addrs[DstIdx], QT), /* isInitialization */ true); } @@ -770,7 +770,7 @@ struct GenMoveAssignment : GenBinaryFunc { LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); - CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress()), SrcLV); + CGF->EmitStoreOfScalar(getNullForVariable(SrcLV.getAddress(*CGF)), SrcLV); LValue DstLV = CGF->MakeAddrLValue(Addrs[DstIdx], QT); llvm::Value *DstVal = CGF->EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); @@ -806,7 +806,8 @@ void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF, // such structure. void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) { GenDefaultInitialize Gen(getContext()); - Address DstPtr = Builder.CreateBitCast(Dst.getAddress(), CGM.Int8PtrPtrTy); + Address DstPtr = + Builder.CreateBitCast(Dst.getAddress(*this), CGM.Int8PtrPtrTy); Gen.setCGF(this); QualType QT = Dst.getType(); QT = Dst.isVolatile() ? QT.withVolatile() : QT; @@ -850,7 +851,7 @@ getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, // Functions to emit calls to the special functions of a non-trivial C struct. void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); - Address DstPtr = Dst.getAddress(); + Address DstPtr = Dst.getAddress(*this); QualType QT = Dst.getType(); GenDefaultInitializeFuncName GenName(DstPtr.getAlignment(), getContext()); std::string FuncName = GenName.getName(QT, IsVolatile); @@ -874,7 +875,7 @@ std::string CodeGenFunction::getNonTrivialDestructorStr(QualType QT, void CodeGenFunction::callCStructDestructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); - Address DstPtr = Dst.getAddress(); + Address DstPtr = Dst.getAddress(*this); QualType QT = Dst.getType(); GenDestructorFuncName GenName("__destructor_", DstPtr.getAlignment(), getContext()); @@ -885,7 +886,7 @@ void CodeGenFunction::callCStructDestructor(LValue Dst) { void CodeGenFunction::callCStructCopyConstructor(LValue Dst, LValue Src) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName GenName("__copy_constructor_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); @@ -899,7 +900,7 @@ void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src ) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName GenName("__copy_assignment_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); @@ -910,7 +911,7 @@ void CodeGenFunction::callCStructCopyAssignmentOperator(LValue Dst, LValue Src void CodeGenFunction::callCStructMoveConstructor(LValue Dst, LValue Src) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName GenName("__move_constructor_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); @@ -924,7 +925,7 @@ void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src ) { bool IsVolatile = Dst.isVolatile() || Src.isVolatile(); - Address DstPtr = Dst.getAddress(), SrcPtr = Src.getAddress(); + Address DstPtr = Dst.getAddress(*this), SrcPtr = Src.getAddress(*this); QualType QT = Dst.getType(); GenBinaryFuncName GenName("__move_assignment_", DstPtr.getAlignment(), SrcPtr.getAlignment(), getContext()); diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 984fa599a99f3..14391f3b129a1 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -511,7 +511,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, method->getMethodFamily() == OMF_retain) { if (auto lvalueExpr = findWeakLValue(E->getInstanceReceiver())) { LValue lvalue = EmitLValue(lvalueExpr); - llvm::Value *result = EmitARCLoadWeakRetained(lvalue.getAddress()); + llvm::Value *result = EmitARCLoadWeakRetained(lvalue.getAddress(*this)); return AdjustObjCObjectType(*this, E->getType(), RValue::get(result)); } } @@ -749,8 +749,8 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, ASTContext &Context = CGF.getContext(); Address src = - CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) - .getAddress(); + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getAddress(CGF); // objc_copyStruct (ReturnValue, &structIvar, // sizeof (Type of Ivar), isAtomic, false); @@ -1022,8 +1022,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, // The 2nd argument is the address of the ivar. llvm::Value *ivarAddr = - CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), - CGF.LoadObjCSelf(), ivar, 0).getPointer(); + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getPointer(CGF); ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); @@ -1082,7 +1082,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay // Perform an atomic load. This does not impose ordering constraints. - Address ivarAddr = LV.getAddress(); + Address ivarAddr = LV.getAddress(*this); ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType); llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load"); load->setAtomic(llvm::AtomicOrdering::Unordered); @@ -1183,14 +1183,14 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, case TEK_Scalar: { llvm::Value *value; if (propType->isReferenceType()) { - value = LV.getAddress().getPointer(); + value = LV.getAddress(*this).getPointer(); } else { // We want to load and autoreleaseReturnValue ARC __weak ivars. if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) { if (getLangOpts().ObjCAutoRefCount) { value = emitARCRetainLoadOfScalar(*this, LV, ivarType); } else { - value = EmitARCLoadWeak(LV.getAddress()); + value = EmitARCLoadWeak(LV.getAddress(*this)); } // Otherwise we want to do a simple load, suppressing the @@ -1224,9 +1224,9 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, CallArgList args; // The first argument is the address of the ivar. - llvm::Value *ivarAddr = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), - CGF.LoadObjCSelf(), ivar, 0) - .getPointer(); + llvm::Value *ivarAddr = + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getPointer(CGF); ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); @@ -1235,7 +1235,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, DeclRefExpr argRef(CGF.getContext(), argVar, false, argVar->getType().getNonReferenceType(), VK_LValue, SourceLocation()); - llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(); + llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF); argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); @@ -1271,8 +1271,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, // The first argument is the address of the ivar. llvm::Value *ivarAddr = - CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), - CGF.LoadObjCSelf(), ivar, 0).getPointer(); + CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0) + .getPointer(CGF); ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy); args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy); @@ -1281,7 +1281,7 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, DeclRefExpr argRef(CGF.getContext(), argVar, false, argVar->getType().getNonReferenceType(), VK_LValue, SourceLocation()); - llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(); + llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF); argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); @@ -1358,7 +1358,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, LValue ivarLValue = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, /*quals*/ 0); - Address ivarAddr = ivarLValue.getAddress(); + Address ivarAddr = ivarLValue.getAddress(*this); // Currently, all atomic accesses have to be through integer // types, so there's no point in trying to pick a prettier type. @@ -1535,7 +1535,7 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { LValue lvalue = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), addr, ivar, /*CVR*/ 0); - CGF.emitDestroy(lvalue.getAddress(), ivar->getType(), destroyer, + CGF.emitDestroy(lvalue.getAddress(CGF), ivar->getType(), destroyer, flags.isForNormalCleanup() && useEHCleanupForArray); } }; @@ -1602,7 +1602,7 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, LValue LV = EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), Ivar, 0); EmitAggExpr(IvarInit->getInit(), - AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, + AggValueSlot::forLValue(LV, *this, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap)); @@ -2327,7 +2327,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst, !isBlock && (dst.getAlignment().isZero() || dst.getAlignment() >= CharUnits::fromQuantity(PointerAlignInBytes))) { - return EmitARCStoreStrongCall(dst.getAddress(), newValue, ignored); + return EmitARCStoreStrongCall(dst.getAddress(*this), newValue, ignored); } // Otherwise, split it out. @@ -2726,7 +2726,7 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal(); } else { assert(type.getObjCLifetime() == Qualifiers::OCL_Weak); - result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress()); + result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress(CGF)); } return TryEmitResult(result, !shouldRetain); } @@ -2750,7 +2750,7 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, SourceLocation()).getScalarVal(); // Set the source pointer to NULL. - CGF.EmitStoreOfScalar(getNullForVariable(lv.getAddress()), lv); + CGF.EmitStoreOfScalar(getNullForVariable(lv.getAddress(CGF)), lv); return TryEmitResult(result, true); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5196c4d8503e7..f6edf899b5492 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -357,7 +357,7 @@ class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); PrivScope.addPrivate( - VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); + VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); } (void)PrivScope.Privatize(); } @@ -842,7 +842,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); + InitRVal = RValue::getAggregate(LV.getAddress(CGF)); break; } OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); @@ -966,7 +966,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress()); + DRD, SharedLVal.getAddress(CGF)); } ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds, @@ -1007,13 +1007,13 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = - cast(SharedAddresses[N].first.getPointer()->getType()) - ->getElementType(); + auto *ElemType = cast( + SharedAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), - SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), + SharedAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); @@ -1063,7 +1063,7 @@ void ReductionCodeGen::emitInitialization( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); QualType SharedType = SharedAddresses[N].first.getType(); SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), CGF.ConvertTypeForMem(SharedType)), SharedType, SharedAddresses[N].first.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); @@ -1071,7 +1071,7 @@ void ReductionCodeGen::emitInitialization( emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(), + PrivateAddr, SharedLVal.getAddress(CGF), SharedLVal.getType()); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { @@ -1108,15 +1108,15 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { if (const auto *PtrTy = BaseTy->getAs()) { - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); } else { - LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), CGF.ConvertTypeForMem(ElTy)), BaseLV.getType(), BaseLV.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); @@ -1180,15 +1180,15 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( - BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); llvm::Value *PrivatePointer = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivateAddr.getPointer(), - SharedAddresses[N].first.getAddress().getType()); + SharedAddresses[N].first.getAddress(CGF).getType()); llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getAddress().getType(), + OriginalBaseLValue.getAddress(CGF).getType(), OriginalBaseLValue.getAlignment(), Ptr); } BaseDecls.emplace_back( @@ -1381,12 +1381,12 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs()) - .getAddress(); + .getAddress(CGF); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs()) - .getAddress(); + .getAddress(CGF); }); (void)Scope.Privatize(); if (!IsCombiner && Out->hasInit() && @@ -1496,7 +1496,7 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), TaskTVar->getType()->castAs()) - .getPointer()}; + .getPointer(CGF)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); }; CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, @@ -1707,9 +1707,10 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || !CGF.getLangOpts().CXXExceptions || CGF.Builder.GetInsertBlock() == TopBlock || - !isa(LVal.getPointer()) || - cast(LVal.getPointer())->getParent() == TopBlock || - cast(LVal.getPointer())->getParent() == + !isa(LVal.getPointer(CGF)) || + cast(LVal.getPointer(CGF))->getParent() == + TopBlock || + cast(LVal.getPointer(CGF))->getParent() == CGF.Builder.GetInsertBlock()) { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in @@ -3119,7 +3120,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) - return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); + return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); llvm::Value *ThreadID = getThreadID(CGF, Loc); QualType Int32Ty = @@ -3395,7 +3396,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), + CGF.VoidPtrTy), Elem); } // Build function that copies private values from single region to all other @@ -4540,7 +4542,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, const auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - llvm::Value *PartidParam = PartIdLVal.getPointer(); + llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -4553,7 +4555,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivatesLVal.getPointer(), CGF.VoidPtrTy); + PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -4562,7 +4564,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMap, CGF.Builder .CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) + TDBase.getAddress(CGF), CGF.VoidPtrTy) .getPointer()}; SmallVector CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); @@ -4640,7 +4642,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, if (QualType::DestructionKind DtorKind = Field->getType().isDestructedType()) { LValue FieldLValue = CGF.EmitLValueForField(Base, Field); - CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); + CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); } } CGF.FinishFunction(); @@ -4738,8 +4740,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, LValue RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( - RefLVal.getAddress(), RefLVal.getType()->castAs()); - CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); + RefLVal.getAddress(CGF), RefLVal.getType()->castAs()); + CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); ++Counter; } CGF.FinishFunction(); @@ -4804,7 +4806,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } else { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + Address(SharedRefLValue.getPointer(CGF), + C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); } @@ -4817,7 +4820,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array using element-by-element // initialization. CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, + PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), + Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. @@ -4835,8 +4839,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { - return SharedRefLValue.getAddress(); + InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { + return SharedRefLValue.getAddress(CGF); }); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); @@ -5236,10 +5240,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, dyn_cast(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = - CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); + llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( + UpAddrLVal.getPointer(CGF), /*Idx0=*/1); llvm::Value *LowIntPtr = - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else { @@ -5252,7 +5256,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(); LValue LenLVal = CGF.EmitLValueForField( @@ -5406,21 +5410,24 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); const auto *LBVar = cast(cast(D.getLowerBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), + LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); const auto *UBVar = cast(cast(D.getUpperBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), + UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); const auto *StVar = cast(cast(D.getStrideVariable())->getDecl()); - CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), + StLVal.getQuals(), /*IsInitializer=*/true); // Store reductions address. LValue RedLVal = CGF.EmitLValueForField( @@ -5429,7 +5436,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); } else { - CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.EmitNullInitialization(RedLVal.getAddress(CGF), CGF.getContext().VoidPtrTy); } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; @@ -5438,11 +5445,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ThreadID, Result.NewTask, IfVal, - LBLVal.getPointer(), - UBLVal.getPointer(), + LBLVal.getPointer(CGF), + UBLVal.getPointer(CGF), CGF.EmitLoadOfScalar(StLVal, Loc), llvm::ConstantInt::getSigned( - CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -5754,7 +5761,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. @@ -6234,7 +6241,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); RCG.emitSharedLValue(CGF, Cnt); llvm::Value *CastedShared = - CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); CGF.EmitStoreOfScalar(CastedShared, SharedLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; @@ -6277,7 +6284,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else - CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), + FlagsLVal.getType()); } // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void // *data); @@ -6313,7 +6321,7 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), SharedAddr, /*IsVolatile=*/false); } } @@ -6324,12 +6332,12 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, LValue SharedLVal) { // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); - llvm::Value *Args[] = { - CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, - /*isSigned=*/true), - ReductionsPtr, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), - CGM.VoidPtrTy)}; + llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; return Address( CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), @@ -7514,11 +7522,11 @@ class MappableExprsHandler { } else if ((AE && isa(AE->getBase()->IgnoreParenImpCasts())) || (OASE && isa(OASE->getBase()->IgnoreParenImpCasts()))) { - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); if (const auto *VD = dyn_cast_or_null(I->getAssociatedDeclaration())) { if (llvm::Optional Res = @@ -7612,8 +7620,8 @@ class MappableExprsHandler { isa(Next->getAssociatedExpression())) && "Unexpected expression"); - Address LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object @@ -7660,7 +7668,7 @@ class MappableExprsHandler { if (MC.getAssociatedDeclaration()) { ComponentLB = CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(); + .getAddress(CGF); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); @@ -8064,7 +8072,7 @@ class MappableExprsHandler { auto CI = DeferredInfo.find(M.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); + llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); CurBasePointers.emplace_back(BasePtr, L.VD); @@ -8186,9 +8194,10 @@ class MappableExprsHandler { LValue ThisLVal = CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); - LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(ThisLVal.getPointer()); - Pointers.push_back(ThisLValVal.getPointer()); + LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(ThisLVal.getPointer(CGF)); + Pointers.push_back(ThisLValVal.getPointer(CGF)); Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); @@ -8206,17 +8215,19 @@ class MappableExprsHandler { LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); if (LC.getCaptureKind() == LCK_ByRef) { LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); - Pointers.push_back(VarLValVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); + Pointers.push_back(VarLValVal.getPointer(CGF)); Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( VD->getType().getCanonicalType().getNonReferenceType()), CGF.Int64Ty, /*isSigned=*/true)); } else { RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); Pointers.push_back(VarRVal.getScalarVal()); Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } @@ -8522,7 +8533,7 @@ class MappableExprsHandler { CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); // Copy the value of the original variable to the new global copy. CGF.Builder.CreateMemCpy( - CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), + CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), CurSizes.back(), /*IsVolatile=*/false); // Use new global variable as the base pointers. @@ -8932,7 +8943,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { return MapperCGF .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs()) - .getAddress(); + .getAddress(MapperCGF); }); (void)Scope.Privatize(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index abfba39e6be17..e5ec3deac2c94 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -2318,7 +2318,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, VarTy = Rec.second.FD->getType(); } else { llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( - VarAddr.getAddress().getPointer(), + VarAddr.getAddress(CGF).getPointer(), {Bld.getInt32(0), getNVPTXLaneID(CGF)}); VarTy = Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); @@ -2326,7 +2326,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, AlignmentSource::Decl); } - Rec.second.PrivateAddr = VarAddr.getAddress(); + Rec.second.PrivateAddr = VarAddr.getAddress(CGF); if (!IsInTTDRegion && (WithSPMDCheck || getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { @@ -2337,10 +2337,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, "Secondary glob data must be one per team."); LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD); VarAddr.setAddress( - Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(), - VarAddr.getPointer()), + Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF), + VarAddr.getPointer(CGF)), VarAddr.getAlignment())); - Rec.second.PrivateAddr = VarAddr.getAddress(); + Rec.second.PrivateAddr = VarAddr.getAddress(CGF); } Address GlobalPtr = Rec.second.PrivateAddr; Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName()); @@ -2352,7 +2352,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, if (EscapedParam) { const auto *VD = cast(Rec.first); CGF.EmitStoreOfScalar(ParValue, VarAddr); - I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); + I->getSecond().MappedParams->setVarAddr(CGF, VD, + VarAddr.getAddress(CGF)); } if (IsTTD) ++SecIt; @@ -2386,7 +2387,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.getContext().getDeclAlign(VD), AlignmentSource::Decl); I->getSecond().MappedParams->setVarAddr(CGF, cast(VD), - Base.getAddress()); + Base.getAddress(CGF)); I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue); } I->getSecond().MappedParams->apply(CGF); @@ -3690,7 +3691,8 @@ static llvm::Value *emitListToGlobalCopyFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { case TEK_Scalar: { @@ -3787,7 +3789,8 @@ static llvm::Value *emitListToGlobalReduceFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { @@ -3891,7 +3894,8 @@ static llvm::Value *emitGlobalToListCopyFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); switch (CGF.getEvaluationKind(Private->getType())) { case TEK_Scalar: { @@ -3987,7 +3991,8 @@ static llvm::Value *emitGlobalToListReduceFunction( const FieldDecl *FD = VarFieldMap.lookup(VD); LValue GlobLVal = CGF.EmitLValueForField( CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); - llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *BufferPtr = + Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs); llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { @@ -4310,7 +4315,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction( Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. @@ -4892,7 +4897,7 @@ void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( if (VD->getType().getCanonicalType()->isReferenceType()) VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType().getCanonicalType()) - .getAddress(); + .getAddress(CGF); CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal); } } diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index f3a4e98edc3a5..1005855a5cadb 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -1857,15 +1857,15 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, Ty = llvm::IntegerType::get(getLLVMContext(), Size); Ty = llvm::PointerType::getUnqual(Ty); - Arg = Builder.CreateLoad(Builder.CreateBitCast(InputValue.getAddress(), - Ty)); + Arg = Builder.CreateLoad( + Builder.CreateBitCast(InputValue.getAddress(*this), Ty)); } else { - Arg = InputValue.getPointer(); + Arg = InputValue.getPointer(*this); ConstraintStr += '*'; } } } else { - Arg = InputValue.getPointer(); + Arg = InputValue.getPointer(*this); ConstraintStr += '*'; } @@ -2114,8 +2114,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getFixedSize()); } else { - ArgTypes.push_back(Dest.getAddress().getType()); - Args.push_back(Dest.getPointer()); + ArgTypes.push_back(Dest.getAddress(*this).getType()); + Args.push_back(Dest.getPointer(*this)); Constraints += "=*"; Constraints += OutputConstraint; ReadOnly = ReadNone = false; @@ -2357,7 +2357,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // ResultTypeRequiresCast.size() elements of RegResults. if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); - Address A = Builder.CreateBitCast(Dest.getAddress(), + Address A = Builder.CreateBitCast(Dest.getAddress(*this), ResultRegTypes[i]->getPointerTo()); QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false); if (Ty.isNull()) { @@ -2410,14 +2410,14 @@ CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) { delete CGF.CapturedStmtInfo; // Emit call to the helper function. - EmitCallOrInvoke(F, CapStruct.getPointer()); + EmitCallOrInvoke(F, CapStruct.getPointer(*this)); return F; } Address CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) { LValue CapStruct = InitCapturedStruct(S); - return CapStruct.getAddress(); + return CapStruct.getAddress(*this); } /// Creates the outlined function for a CapturedStmt. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index e2c055f549e02..1e6933df7084d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/DeclOpenMP.h" @@ -77,7 +78,7 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { InlinedShareds.isGlobalVarCaptured(VD)), VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); + return CGF.EmitLValue(&DRE).getAddress(CGF); }); } } @@ -232,7 +233,7 @@ class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); + return CGF.EmitLValue(&DRE).getAddress(CGF); }); } } @@ -325,7 +326,7 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( CapturedVars.push_back(CV); } else { assert(CurCap->capturesVariable() && "Expected capture by reference."); - CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); + CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); } } } @@ -336,11 +337,11 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, ASTContext &Ctx = CGF.getContext(); llvm::Value *CastedPtr = CGF.EmitScalarConversion( - AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), + AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), Ctx.getPointerType(DstType), Loc); Address TmpAddr = CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) - .getAddress(); + .getAddress(CGF); return TmpAddr; } @@ -519,7 +520,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( } else if (I->capturesVariable()) { const VarDecl *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); - Address ArgAddr = ArgLVal.getAddress(); + Address ArgAddr = ArgLVal.getAddress(CGF); if (ArgLVal.getType()->isLValueReferenceType()) { ArgAddr = CGF.EmitLoadOfReference(ArgLVal); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { @@ -541,12 +542,12 @@ static llvm::Function *emitOutlinedFunctionPrologue( ? castValueFromUintptr( CGF, I->getLocation(), FD->getType(), Args[Cnt]->getName(), ArgLVal) - : ArgLVal.getAddress()}}); + : ArgLVal.getAddress(CGF)}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); - LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); + LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); } ++Cnt; ++I; @@ -830,8 +831,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, EmitAggregateAssign(Dest, OriginalLVal, Type); } else { EmitOMPAggregateAssign( - Emission.getAllocatedAddress(), OriginalLVal.getAddress(), - Type, + Emission.getAllocatedAddress(), + OriginalLVal.getAddress(*this), Type, [this, VDInit, Init](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the @@ -849,7 +850,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, return Emission.getAllocatedAddress(); }); } else { - Address OriginalAddr = OriginalLVal.getAddress(); + Address OriginalAddr = OriginalLVal.getAddress(*this); IsRegistered = PrivateScope.addPrivate( OrigVD, [this, VDInit, OriginalAddr, VD]() { // Emit private VarDecl with copy init. @@ -926,7 +927,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { "Copyin threadprivates should have been captured!"); DeclRefExpr DRE(getContext(), const_cast(VD), true, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - MasterAddr = EmitLValue(&DRE).getAddress(); + MasterAddr = EmitLValue(&DRE).getAddress(*this); LocalDeclMap.erase(VD); } else { MasterAddr = @@ -935,7 +936,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { getContext().getDeclAlign(VD)); } // Get the address of the threadprivate variable. - Address PrivateAddr = EmitLValue(*IRef).getAddress(); + Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); if (CopiedVars.size() == 1) { // At first check if current thread is a master thread. If it is, no // need to copy data. @@ -1003,7 +1004,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( /*RefersToEnclosingVariableOrCapture=*/ CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - return EmitLValue(&DRE).getAddress(); + return EmitLValue(&DRE).getAddress(*this); }); // Check if the variable is also a firstprivate: in this case IInit is // not generated. Initialization of this variable will happen in codegen @@ -1160,8 +1161,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { - return RedCG.getSharedLValue(Count).getAddress(); + PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { + return RedCG.getSharedLValue(Count).getAddress(*this); }); PrivateScope.addPrivate( RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); @@ -1169,8 +1170,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( isa(IRef)) { // Store the address of the original variable associated with the LHS // implicit variable. - PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { - return RedCG.getSharedLValue(Count).getAddress(); + PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { + return RedCG.getSharedLValue(Count).getAddress(*this); }); PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), @@ -1180,7 +1181,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( } else { QualType Type = PrivateVD->getType(); bool IsArray = getContext().getAsArrayType(Type) != nullptr; - Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); // Store the address of the original variable associated with the LHS // implicit variable. if (IsArray) { @@ -1529,7 +1530,7 @@ void CodeGenFunction::EmitOMPLinearClauseFinal( DeclRefExpr DRE(getContext(), const_cast(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); - Address OrigAddr = EmitLValue(&DRE).getAddress(); + Address OrigAddr = EmitLValue(&DRE).getAddress(*this); CodeGenFunction::OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); (void)VarScope.Privatize(); @@ -1599,7 +1600,7 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( DeclRefExpr DRE(getContext(), const_cast(VD), LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), E->getType(), VK_LValue, E->getExprLoc()); - return EmitLValue(&DRE).getAddress(); + return EmitLValue(&DRE).getAddress(*this); }); } else { (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { @@ -1762,12 +1763,13 @@ void CodeGenFunction::EmitOMPSimdFinal( } Address OrigAddr = Address::invalid(); if (CED) { - OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); + OrigAddr = + EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); } else { DeclRefExpr DRE(getContext(), const_cast(PrivateVD), /*RefersToEnclosingVariableOrCapture=*/false, (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); - OrigAddr = EmitLValue(&DRE).getAddress(); + OrigAddr = EmitLValue(&DRE).getAddress(*this); } OMPPrivateScope VarScope(*this); VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); @@ -2277,14 +2279,16 @@ static void emitDistributeParallelForDistributeInnerBoundParams( const auto &Dir = cast(S); LValue LB = CGF.EmitLValue(cast(Dir.getCombinedLowerBoundVariable())); - llvm::Value *LBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + llvm::Value *LBCast = + CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), + CGF.SizeTy, /*isSigned=*/false); CapturedVars.push_back(LBCast); LValue UB = CGF.EmitLValue(cast(Dir.getCombinedUpperBoundVariable())); - llvm::Value *UBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + llvm::Value *UBCast = + CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), + CGF.SizeTy, /*isSigned=*/false); CapturedVars.push_back(UBCast); } @@ -2521,8 +2525,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // one chunk is distributed to each thread. Note that the size of // the chunks is unspecified in this case. CGOpenMPRuntime::StaticRTInput StaticInit( - IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress(), + IVSize, IVSigned, Ordered, IL.getAddress(CGF), + LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), StaticChunkedOne ? Chunk : nullptr); CGF.CGM.getOpenMPRuntime().emitForStaticInit( CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, @@ -2571,9 +2575,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), - ST.getAddress(), IL.getAddress(), - Chunk, EUB); + const OMPLoopArguments LoopArguments( + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + IL.getAddress(*this), Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, LoopArguments, CGDispatchBounds); } @@ -2777,8 +2781,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { OpenMPScheduleTy ScheduleKind; ScheduleKind.Schedule = OMPC_SCHEDULE_static; CGOpenMPRuntime::StaticRTInput StaticInit( - /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress()); + /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), + LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); CGF.CGM.getOpenMPRuntime().emitForStaticInit( CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); @@ -3112,7 +3116,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); Scope.addPrivate(Pair.first, [&CGF, &DRE]() { - return CGF.EmitLValue(&DRE).getAddress(); + return CGF.EmitLValue(&DRE).getAddress(CGF); }); } for (const auto &Pair : PrivatePtrs) { @@ -3209,7 +3213,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); OMPLexicalScope Scope(*this, S, llvm::None, - !isOpenMPParallelDirective(S.getDirectiveKind())); + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPSimdDirective(S.getDirectiveKind())); TaskGen(*this, OutlinedFn, Data); } @@ -3570,8 +3575,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); CGOpenMPRuntime::StaticRTInput StaticInit( - IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress(), + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), StaticChunked ? Chunk : nullptr); RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); @@ -3637,8 +3642,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. const OMPLoopArguments LoopArguments = { - LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), - Chunk}; + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + IL.getAddress(*this), Chunk}; EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, CodeGenLoop); } @@ -3838,11 +3843,11 @@ static std::pair emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, // expression is simple and atomic is allowed for the given type for the // target platform. if (BO == BO_Comma || !Update.isScalar() || - !Update.getScalarVal()->getType()->isIntegerTy() || - !X.isSimple() || (!isa(Update.getScalarVal()) && - (Update.getScalarVal()->getType() != - X.getAddress().getElementType())) || - !X.getAddress().getElementType()->isIntegerTy() || + !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || + (!isa(Update.getScalarVal()) && + (Update.getScalarVal()->getType() != + X.getAddress(CGF).getElementType())) || + !X.getAddress(CGF).getElementType()->isIntegerTy() || !Context.getTargetInfo().hasBuiltinAtomic( Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) return std::make_pair(false, RValue::get(nullptr)); @@ -3914,11 +3919,11 @@ static std::pair emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, llvm::Value *UpdateVal = Update.getScalarVal(); if (auto *IC = dyn_cast(UpdateVal)) { UpdateVal = CGF.Builder.CreateIntCast( - IC, X.getAddress().getElementType(), + IC, X.getAddress(CGF).getElementType(), X.getType()->hasSignedIntegerRepresentation()); } llvm::Value *Res = - CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); + CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } @@ -5101,10 +5106,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.incrementProfileCounter(&S); } - if (isOpenMPSimdDirective(S.getDirectiveKind())) { - CGF.EmitOMPSimdInit(S); - (void)CGF.EmitOMPLinearClauseInit(S); - } + (void)CGF.EmitOMPLinearClauseInit(S); OMPPrivateScope LoopScope(CGF); // Emit helper vars inits. @@ -5140,13 +5142,24 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitIgnoredExpr(S.getCalcLastIteration()); } - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, JumpDest()); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + { + OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); + emitCommonSimdLoop( + CGF, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S); + }, + [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); + } // Emit: if (PreCond) - end. if (ContBlock) { CGF.EmitBranch(ContBlock); @@ -5200,6 +5213,7 @@ void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { + OMPLexicalScope Scope(*this, S); EmitOMPTaskLoopBasedDirective(S); } @@ -5219,7 +5233,7 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( Action.Enter(CGF); EmitOMPTaskLoopBasedDirective(S); }; - OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } @@ -5292,7 +5306,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { LValue GlobLVal = CGF.EmitLValue(E); LoopGlobals.addPrivate( - VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); + VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); } if (isa(VD)) { // Emit only those that were not explicitly referenced in clauses. diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h index 71f95abe488a9..9fd07bdb187d4 100644 --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -29,6 +29,7 @@ namespace llvm { namespace clang { namespace CodeGen { class AggValueSlot; + class CodeGenFunction; struct CGBitFieldInfo; /// RValue - This trivial value class is used to represent the result of an @@ -319,11 +320,13 @@ class LValue { void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; } // simple lvalue - llvm::Value *getPointer() const { + llvm::Value *getPointer(CodeGenFunction &CGF) const { assert(isSimple()); return V; } - Address getAddress() const { return Address(getPointer(), getAlignment()); } + Address getAddress(CodeGenFunction &CGF) const { + return Address(getPointer(CGF), getAlignment()); + } void setAddress(Address address) { assert(isSimple()); V = address.getPointer(); @@ -427,8 +430,8 @@ class LValue { return R; } - RValue asAggregateRValue() const { - return RValue::getAggregate(getAddress(), isVolatileQualified()); + RValue asAggregateRValue(CodeGenFunction &CGF) const { + return RValue::getAggregate(getAddress(CGF), isVolatileQualified()); } }; @@ -536,14 +539,12 @@ class AggValueSlot { return AV; } - static AggValueSlot forLValue(const LValue &LV, - IsDestructed_t isDestructed, - NeedsGCBarriers_t needsGC, - IsAliased_t isAliased, - Overlap_t mayOverlap, - IsZeroed_t isZeroed = IsNotZeroed, - IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) { - return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC, + static AggValueSlot + forLValue(const LValue &LV, CodeGenFunction &CGF, IsDestructed_t isDestructed, + NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed, + IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) { + return forAddr(LV.getAddress(CGF), LV.getQuals(), isDestructed, needsGC, isAliased, mayOverlap, isZeroed, isChecked); } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e2707fe2bec8d..475d015ecf268 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -34,6 +34,8 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/FPEnv.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" @@ -88,6 +90,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) FMF.setAllowReassoc(); } Builder.setFastMathFlags(FMF); + SetFPModel(); } CodeGenFunction::~CodeGenFunction() { @@ -103,6 +106,51 @@ CodeGenFunction::~CodeGenFunction() { CGM.getOpenMPRuntime().functionFinished(*this); } +// Map the LangOption for rounding mode into +// the corresponding enum in the IR. +static llvm::fp::RoundingMode ToConstrainedRoundingMD( + LangOptions::FPRoundingModeKind Kind) { + + switch (Kind) { + case LangOptions::FPR_ToNearest: return llvm::fp::rmToNearest; + case LangOptions::FPR_Downward: return llvm::fp::rmDownward; + case LangOptions::FPR_Upward: return llvm::fp::rmUpward; + case LangOptions::FPR_TowardZero: return llvm::fp::rmTowardZero; + case LangOptions::FPR_Dynamic: return llvm::fp::rmDynamic; + } + llvm_unreachable("Unsupported FP RoundingMode"); +} + +// Map the LangOption for exception behavior into +// the corresponding enum in the IR. +static llvm::fp::ExceptionBehavior ToConstrainedExceptMD( + LangOptions::FPExceptionModeKind Kind) { + + switch (Kind) { + case LangOptions::FPE_Ignore: return llvm::fp::ebIgnore; + case LangOptions::FPE_MayTrap: return llvm::fp::ebMayTrap; + case LangOptions::FPE_Strict: return llvm::fp::ebStrict; + } + llvm_unreachable("Unsupported FP Exception Behavior"); +} + +void CodeGenFunction::SetFPModel() { + auto fpRoundingMode = ToConstrainedRoundingMD( + getLangOpts().getFPRoundingMode()); + auto fpExceptionBehavior = ToConstrainedExceptMD( + getLangOpts().getFPExceptionMode()); + + if (fpExceptionBehavior == llvm::fp::ebIgnore && + fpRoundingMode == llvm::fp::rmToNearest) + // Constrained intrinsics are not used. + ; + else { + Builder.setIsFPConstrained(true); + Builder.setDefaultConstrainedRounding(fpRoundingMode); + Builder.setDefaultConstrainedExcept(fpExceptionBehavior); + } +} + CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { @@ -841,6 +889,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (FD->isMain()) Fn->addFnAttr(llvm::Attribute::NoRecurse); + if (const FunctionDecl *FD = dyn_cast_or_null(D)) + if (FD->usesFPIntrin()) + Fn->addFnAttr(llvm::Attribute::StrictFP); + // If a custom alignment is used, force realigning to this alignment on // any main function which certainly will need it. if (const FunctionDecl *FD = dyn_cast_or_null(D)) @@ -999,7 +1051,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField); if (!LambdaThisCaptureField->getType()->isPointerType()) { // If the enclosing object was captured by value, just use its address. - CXXThisValue = ThisFieldLValue.getAddress().getPointer(); + CXXThisValue = ThisFieldLValue.getAddress(*this).getPointer(); } else { // Load the lvalue pointed to by the field, since '*this' was captured // by reference. @@ -2036,11 +2088,11 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { Address CodeGenFunction::EmitVAListRef(const Expr* E) { if (getContext().getBuiltinVaListType()->isArrayType()) return EmitPointerWithAlignment(E); - return EmitLValue(E).getAddress(); + return EmitLValue(E).getAddress(*this); } Address CodeGenFunction::EmitMSVAListRef(const Expr *E) { - return EmitLValue(E).getAddress(); + return EmitLValue(E).getAddress(*this); } void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 0c6fcb89f6ba0..c1718cade2f88 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4178,6 +4178,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); + /// SetFPModel - Control floating point behavior via fp-model settings. + void SetFPModel(); + private: llvm::MDNode *getRangeForLoadFromType(QualType Ty); void EmitReturnOfRValue(RValue RV, QualType Ty); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 7432233f8afc9..306969aea522f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -74,6 +74,7 @@ static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getTarget().getCXXABI().getKind()) { + case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h index 59a19730f4ebc..121acbac4fa91 100644 --- a/clang/lib/CodeGen/ConstantEmitter.h +++ b/clang/lib/CodeGen/ConstantEmitter.h @@ -23,7 +23,7 @@ namespace CodeGen { class ConstantEmitter { public: CodeGenModule &CGM; - CodeGenFunction *CGF; + CodeGenFunction *const CGF; private: bool Abstract = false; diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 0a7a4fe33ac2d..bdecff39c88fd 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1114,8 +1114,8 @@ struct CounterCoverageMappingBuilder // Make a region for the body of the switch. If the body starts with // a case, that case will reuse this region; otherwise, this covers // the unreachable code at the beginning of the switch body. - size_t Index = - pushRegion(Counter::getZero(), getStart(CS->body_front())); + size_t Index = pushRegion(Counter::getZero(), getStart(CS)); + getRegion().setGap(true); for (const auto *Child : CS->children()) Visit(Child); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 8f9b16470b642..515eb3f1f168d 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -487,6 +487,19 @@ class iOS64CXXABI : public ARMCXXABI { bool shouldRTTIBeUnique() const override { return false; } }; +class FuchsiaCXXABI final : public ItaniumCXXABI { +public: + explicit FuchsiaCXXABI(CodeGen::CodeGenModule &CGM) + : ItaniumCXXABI(CGM) {} + +private: + bool HasThisReturn(GlobalDecl GD) const override { + return isa(GD.getDecl()) || + (isa(GD.getDecl()) && + GD.getDtorType() != Dtor_Deleting); + } +}; + class WebAssemblyCXXABI final : public ItaniumCXXABI { public: explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM) @@ -516,6 +529,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { case TargetCXXABI::iOS64: return new iOS64CXXABI(CGM); + case TargetCXXABI::Fuchsia: + return new FuchsiaCXXABI(CGM); + // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't // include the other 32-bit ARM oddities: constructor/destructor return values // and array cookies. diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 8196df614cee8..800d02d5d0394 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1343,6 +1343,13 @@ void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { // The TU defining a dtor is only guaranteed to emit a base destructor. All // other destructor variants are delegating thunks. CGM.EmitGlobal(GlobalDecl(D, Dtor_Base)); + + // If the class is dllexported, emit the complete (vbase) destructor wherever + // the base dtor is emitted. + // FIXME: To match MSVC, this should only be done when the class is exported + // with -fdllexport-inlines enabled. + if (D->getParent()->getNumVBases() > 0 && D->hasAttr()) + CGM.EmitGlobal(GlobalDecl(D, Dtor_Complete)); } CharUnits diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index ec848a312ae01..97bea0150e7f7 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -1225,7 +1225,7 @@ void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( ResultTruncRegTypes.push_back(CoerceTy); // Coerce the integer by bitcasting the return slot pointer. - ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(), + ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(CGF), CoerceTy->getPointerTo())); ResultRegDests.push_back(ReturnSlot); diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 92e04108a7e29..06707fefc9d08 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -13,11 +13,28 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/Triple.h" using namespace clang::driver; using namespace clang; -static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { +static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS, + const llvm::Triple &TargetOrHost) { + // If we don't target Linux, no need to check the distro. This saves a few + // OS calls. + if (!TargetOrHost.isOSLinux()) + return Distro::UnknownDistro; + + // If the host is not running Linux, and we're backed by a real file system, + // no need to check the distro. This is the case where someone is + // cross-compiling from BSD or Windows to Linux, and it would be meaningless + // to try to figure out the "distro" of the non-Linux host. + IntrusiveRefCntPtr RealFS = + llvm::vfs::getRealFileSystem(); + llvm::Triple HostTriple(llvm::sys::getProcessTriple()); + if (!HostTriple.isOSLinux() && &VFS == RealFS.get()) + return Distro::UnknownDistro; + llvm::ErrorOr> File = VFS.getBufferForFile("/etc/lsb-release"); if (File) { @@ -149,4 +166,5 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { return Distro::UnknownDistro; } -Distro::Distro(llvm::vfs::FileSystem &VFS) : DistroVal(DetectDistro(VFS)) {} +Distro::Distro(llvm::vfs::FileSystem &VFS, const llvm::Triple &TargetOrHost) + : DistroVal(DetectDistro(VFS, TargetOrHost)) {} diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 50450b7deb567..6fbff61f76565 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -20,6 +20,62 @@ using namespace clang::driver::tools; using namespace llvm::opt; +void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + const bool IsArch32Bit = getToolChain().getTriple().isArch32Bit(); + const bool IsArch64Bit = getToolChain().getTriple().isArch64Bit(); + // Only support 32 and 64 bit. + if (!IsArch32Bit && !IsArch64Bit) + llvm_unreachable("Unsupported bit width value."); + + // Specify the mode in which the as(1) command operates. + if (IsArch32Bit) { + CmdArgs.push_back("-a32"); + } else { + // Must be 64-bit, otherwise asserted already. + CmdArgs.push_back("-a64"); + } + + // Accept an undefined symbol as an extern so that an error message is not + // displayed. Otherwise, undefined symbols are flagged with error messages. + // FIXME: This should be removed when the assembly generation from the + // compiler is able to write externs properly. + CmdArgs.push_back("-u"); + + // Accept any mixture of instructions. + // On Power for AIX and Linux, this behaviour matches that of GCC for both the + // user-provided assembler source case and the compiler-produced assembler + // source case. Yet XL with user-provided assembler source would not add this. + CmdArgs.push_back("-many"); + + Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); + + // Specify assembler output file. + assert((Output.isFilename() || Output.isNothing()) && "Invalid output."); + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + } + + // Specify assembler input file. + // The system assembler on AIX takes exactly one input file. The driver is + // expected to invoke as(1) separately for each assembler source input file. + if (Inputs.size() != 1) + llvm_unreachable("Invalid number of input files."); + const InputInfo &II = Inputs[0]; + assert((II.isFilename() || II.isNothing()) && "Invalid input."); + if (II.isFilename()) + CmdArgs.push_back(II.getFilename()); + + const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); + C.addCommand(std::make_unique(JA, *this, Exec, CmdArgs, Inputs)); +} + void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, @@ -42,7 +98,7 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - } + } // Set linking mode (i.e., 32/64-bit) and the address of // text and data sections based on arch bit width. @@ -92,11 +148,12 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } -/// AIX - AIX tool chain which can call ld(1) directly. -// TODO: Enable direct call to as(1). +/// AIX - AIX tool chain which can call as(1) and ld(1) directly. AIX::AIX(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : ToolChain(D, Triple, Args) { getFilePaths().push_back(getDriver().SysRoot + "/usr/lib"); } +auto AIX::buildAssembler() const -> Tool * { return new aix::Assembler(*this); } + auto AIX::buildLinker() const -> Tool * { return new aix::Linker(*this); } diff --git a/clang/lib/Driver/ToolChains/AIX.h b/clang/lib/Driver/ToolChains/AIX.h index 58c06c3e4413e..69b948bc0ea82 100644 --- a/clang/lib/Driver/ToolChains/AIX.h +++ b/clang/lib/Driver/ToolChains/AIX.h @@ -16,10 +16,21 @@ namespace clang { namespace driver { namespace tools { -/// aix -- Directly call system default linker. -// TODO: Enable direct call to system default assembler. +/// aix -- Directly call system default assembler and linker. namespace aix { +class LLVM_LIBRARY_VISIBILITY Assembler : public Tool { +public: + Assembler(const ToolChain &TC) : Tool("aix::Assembler", "assembler", TC) {} + + bool hasIntegratedCPP() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + class LLVM_LIBRARY_VISIBILITY Linker : public Tool { public: Linker(const ToolChain &TC) : Tool("aix::Linker", "linker", TC) {} @@ -53,6 +64,7 @@ class LLVM_LIBRARY_VISIBILITY AIX : public ToolChain { bool isPICDefaultForced() const override { return true; } protected: + Tool *buildAssembler() const override; Tool *buildLinker() const override; }; diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index cca47722c2044..68a57310ad402 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -592,39 +592,11 @@ void arm::getARMTargetFeatures(const ToolChain &TC, Features.push_back("+strict-align"); } - // Do not allow r9 reservation with -frwpi. - if (Args.hasArg(options::OPT_ffixed_r9) && Args.hasArg(options::OPT_frwpi)) { - Arg *A = Args.getLastArg(options::OPT_ffixed_r9); - Arg *B = Args.getLastArg(options::OPT_frwpi); - D.Diag(diag::err_opt_not_valid_with_opt) - << A->getAsString(Args) << B->getAsString(Args); - } - - // The compiler can still use a FP in certain circumstances, - // even when frame pointer elimination is enabled. Thus we should - // not allow to reserve a target's FP register. - const llvm::opt::OptSpecifier RestrictFPOpt = - (Triple.isOSDarwin() || (!Triple.isOSWindows() && Triple.isThumb())) - ? options::OPT_ffixed_r7 - : options::OPT_ffixed_r11; - if (Args.hasArg(RestrictFPOpt)) { - const std::string OptStr = - Args.getLastArg(RestrictFPOpt)->getAsString(Args); - const unsigned int SubStrIndex = strlen("ffixed-r"); - D.Diag(diag::err_reserved_frame_pointer) - << OptStr << OptStr.substr(SubStrIndex); - } - -// Reservation of general purpose registers. -#define HANDLE_FFIXED_R(n) \ - if (Args.hasArg(options::OPT_ffixed_r##n)) \ - Features.push_back("+reserve-r" #n) - HANDLE_FFIXED_R(6); - HANDLE_FFIXED_R(7); - HANDLE_FFIXED_R(8); - HANDLE_FFIXED_R(9); - HANDLE_FFIXED_R(10); - HANDLE_FFIXED_R(11); + // llvm does not support reserving registers in general. There is support + // for reserving r9 on ARM though (defined as a platform-specific register + // in ARM EABI). + if (Args.hasArg(options::OPT_ffixed_r9)) + Features.push_back("+reserve-r9"); // The kext linker doesn't know how to deal with movw/movt. if (KernelOrKext || Args.hasArg(options::OPT_mno_movt)) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ddd1174a75834..917b40f103937 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -635,16 +635,33 @@ static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs, /// Add a CC1 and CC1AS option to specify the debug file path prefix map. static void addDebugPrefixMapArg(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { - for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) { + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fdebug_prefix_map_EQ)) { StringRef Map = A->getValue(); if (Map.find('=') == StringRef::npos) - D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map; + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); else CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map)); A->claim(); } } +/// Add a CC1 and CC1AS option to specify the macro file path prefix map. +static void addMacroPrefixMapArg(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs) { + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fmacro_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (Map.find('=') == StringRef::npos) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else + CmdArgs.push_back(Args.MakeArgString("-fmacro-prefix-map=" + Map)); + A->claim(); + } +} + /// Vectorize at all optimization levels greater than 1 except for -Oz. /// For -Oz the loop vectorizer is disabled, while the slp vectorizer is /// enabled. @@ -1355,6 +1372,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, // For IAMCU add special include arguments. getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs); } + + addMacroPrefixMapArg(D, Args, CmdArgs); } // FIXME: Move to target hook. @@ -2295,9 +2314,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, bool AssociativeMath = false; bool ReciprocalMath = false; bool SignedZeros = true; - bool TrappingMath = true; + bool TrappingMath = false; // Implemented via -ffp-exception-behavior + bool TrappingMathPresent = false; // Is trapping-math in args, and not + // overriden by ffp-exception-behavior? + bool RoundingFPMath = false; + bool RoundingMathPresent = false; // Is rounding-math in args? + // -ffp-model values: strict, fast, precise + StringRef FPModel = ""; + // -ffp-exception-behavior options: strict, maytrap, ignore + StringRef FPExceptionBehavior = ""; StringRef DenormalFPMath = ""; StringRef FPContract = ""; + bool StrictFPModel = false; if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); @@ -2305,7 +2333,73 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, } for (const Arg *A : Args) { - switch (A->getOption().getID()) { + auto optID = A->getOption().getID(); + bool PreciseFPModel = false; + switch (optID) { + default: + break; + case options::OPT_frounding_math: + case options::OPT_ftrapping_math: + case options::OPT_ffp_exception_behavior_EQ: + D.Diag(clang::diag::warn_drv_experimental_fp_control_incomplete_opt) + << A->getOption().getName(); + break; + case options::OPT_ffp_model_EQ: { + D.Diag(clang::diag::warn_drv_experimental_fp_control_incomplete_opt) + << A->getOption().getName(); + // If -ffp-model= is seen, reset to fno-fast-math + HonorINFs = true; + HonorNaNs = true; + // Turning *off* -ffast-math restores the toolchain default. + MathErrno = TC.IsMathErrnoDefault(); + AssociativeMath = false; + ReciprocalMath = false; + SignedZeros = true; + // -fno_fast_math restores default denormal and fpcontract handling + DenormalFPMath = ""; + FPContract = ""; + StringRef Val = A->getValue(); + if (OFastEnabled && !Val.equals("fast")) { + // Only -ffp-model=fast is compatible with OFast, ignore. + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << Args.MakeArgString("-ffp-model=" + Val) + << "-Ofast"; + break; + } + StrictFPModel = false; + PreciseFPModel = true; + // ffp-model= is a Driver option, it is entirely rewritten into more + // granular options before being passed into cc1. + // Use the gcc option in the switch below. + if (!FPModel.empty() && !FPModel.equals(Val)) { + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << Args.MakeArgString("-ffp-model=" + FPModel) + << Args.MakeArgString("-ffp-model=" + Val); + FPContract = ""; + } + if (Val.equals("fast")) { + optID = options::OPT_ffast_math; + FPModel = Val; + FPContract = "fast"; + } else if (Val.equals("precise")) { + optID = options::OPT_ffp_contract; + FPModel = Val; + FPContract = "fast"; + PreciseFPModel = true; + } else if (Val.equals("strict")) { + StrictFPModel = true; + optID = options::OPT_frounding_math; + FPExceptionBehavior = "strict"; + FPModel = Val; + TrappingMath = true; + } else + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Val; + break; + } + } + + switch (optID) { // If this isn't an FP option skip the claim below default: continue; @@ -2322,19 +2416,82 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_fno_reciprocal_math: ReciprocalMath = false; break; case options::OPT_fsigned_zeros: SignedZeros = true; break; case options::OPT_fno_signed_zeros: SignedZeros = false; break; - case options::OPT_ftrapping_math: TrappingMath = true; break; - case options::OPT_fno_trapping_math: TrappingMath = false; break; + case options::OPT_ftrapping_math: + if (!TrappingMathPresent && !FPExceptionBehavior.empty() && + !FPExceptionBehavior.equals("strict")) + // Warn that previous value of option is overridden. + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior) + << "-ftrapping-math"; + TrappingMath = true; + TrappingMathPresent = true; + FPExceptionBehavior = "strict"; + break; + case options::OPT_fno_trapping_math: + if (!TrappingMathPresent && !FPExceptionBehavior.empty() && + !FPExceptionBehavior.equals("ignore")) + // Warn that previous value of option is overridden. + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior) + << "-fno-trapping-math"; + TrappingMath = false; + TrappingMathPresent = true; + FPExceptionBehavior = "ignore"; + break; + + case options::OPT_frounding_math: + RoundingFPMath = true; + RoundingMathPresent = true; + break; + + case options::OPT_fno_rounding_math: + RoundingFPMath = false; + RoundingMathPresent = false; + break; case options::OPT_fdenormal_fp_math_EQ: DenormalFPMath = A->getValue(); break; - // Validate and pass through -fp-contract option. + // Validate and pass through -ffp-contract option. case options::OPT_ffp_contract: { StringRef Val = A->getValue(); - if (Val == "fast" || Val == "on" || Val == "off") + if (PreciseFPModel) { + // -ffp-model=precise enables ffp-contract=fast as a side effect + // the FPContract value has already been set to a string literal + // and the Val string isn't a pertinent value. + ; + } else if (Val.equals("fast") || Val.equals("on") || Val.equals("off")) FPContract = Val; else + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Val; + break; + } + + // Validate and pass through -ffp-model option. + case options::OPT_ffp_model_EQ: + // This should only occur in the error case + // since the optID has been replaced by a more granular + // floating point option. + break; + + // Validate and pass through -ffp-exception-behavior option. + case options::OPT_ffp_exception_behavior_EQ: { + StringRef Val = A->getValue(); + if (!TrappingMathPresent && !FPExceptionBehavior.empty() && + !FPExceptionBehavior.equals(Val)) + // Warn that previous value of option is overridden. + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior) + << Args.MakeArgString("-ffp-exception-behavior=" + Val); + TrappingMath = TrappingMathPresent = false; + if (Val.equals("ignore") || Val.equals("maytrap")) + FPExceptionBehavior = Val; + else if (Val.equals("strict")) { + FPExceptionBehavior = Val; + TrappingMath = TrappingMathPresent = true; + } else D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; break; @@ -2354,12 +2511,14 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = true; SignedZeros = false; TrappingMath = false; + FPExceptionBehavior = ""; break; case options::OPT_fno_unsafe_math_optimizations: AssociativeMath = false; ReciprocalMath = false; SignedZeros = true; TrappingMath = true; + FPExceptionBehavior = "strict"; // -fno_unsafe_math_optimizations restores default denormal handling DenormalFPMath = ""; break; @@ -2377,6 +2536,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = true; SignedZeros = false; TrappingMath = false; + RoundingFPMath = false; // If fast-math is set then set the fp-contract mode to fast. FPContract = "fast"; break; @@ -2390,12 +2550,31 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, AssociativeMath = false; ReciprocalMath = false; SignedZeros = true; - TrappingMath = true; + TrappingMath = false; + RoundingFPMath = false; // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = ""; FPContract = ""; break; } + if (StrictFPModel) { + // If -ffp-model=strict has been specified on command line but + // subsequent options conflict then emit warning diagnostic. + if (HonorINFs && HonorNaNs && + !AssociativeMath && !ReciprocalMath && + SignedZeros && TrappingMath && RoundingFPMath && + DenormalFPMath.empty() && FPContract.empty()) + // OK: Current Arg doesn't conflict with -ffp-model=strict + ; + else { + StrictFPModel = false; + FPModel = ""; + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << "-ffp-model=strict" << + ((A->getNumValues() == 0) ? A->getSpelling() + : Args.MakeArgString(A->getSpelling() + A->getValue())); + } + } // If we handled this option claim it A->claim(); @@ -2423,7 +2602,11 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (ReciprocalMath) CmdArgs.push_back("-freciprocal-math"); - if (!TrappingMath) + if (TrappingMath) { + // FP Exception Behavior is also set to strict + assert(FPExceptionBehavior.equals("strict")); + CmdArgs.push_back("-ftrapping-math"); + } else if (TrappingMathPresent) CmdArgs.push_back("-fno-trapping-math"); if (!DenormalFPMath.empty()) @@ -2433,14 +2616,37 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (!FPContract.empty()) CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract)); + if (!RoundingFPMath) + CmdArgs.push_back(Args.MakeArgString("-fno-rounding-math")); + + if (RoundingFPMath && RoundingMathPresent) + CmdArgs.push_back(Args.MakeArgString("-frounding-math")); + + if (!FPExceptionBehavior.empty()) + CmdArgs.push_back(Args.MakeArgString("-ffp-exception-behavior=" + + FPExceptionBehavior)); + ParseMRecip(D, Args, CmdArgs); // -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the // individual features enabled by -ffast-math instead of the option itself as // that's consistent with gcc's behaviour. if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath && - ReciprocalMath && !SignedZeros && !TrappingMath) + ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) { CmdArgs.push_back("-ffast-math"); + if (FPModel.equals("fast")) { + if (FPContract.equals("fast")) + // All set, do nothing. + ; + else if (FPContract.empty()) + // Enable -ffp-contract=fast + CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast")); + else + D.Diag(clang::diag::warn_drv_overriding_flag_option) + << "-ffp-model=fast" + << Args.MakeArgString("-ffp-contract=" + FPContract); + } + } // Handle __FINITE_MATH_ONLY__ similarly. if (!HonorINFs && !HonorNaNs) @@ -4565,9 +4771,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter)) CmdArgs.push_back("-fexperimental-new-constant-interpreter"); - if (Args.hasArg(options::OPT_fforce_experimental_new_constant_interpreter)) - CmdArgs.push_back("-fforce-experimental-new-constant-interpreter"); - if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) { CmdArgs.push_back("-fbracket-depth"); CmdArgs.push_back(A->getValue()); @@ -5736,7 +5939,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, TC.getTriple().isOSBinFormatCOFF()) && !TC.getTriple().isPS4() && !TC.getTriple().isOSNetBSD() && - !Distro(D.getVFS()).IsGentoo() && + !Distro(D.getVFS(), TC.getTriple()).IsGentoo() && !TC.getTriple().isAndroid() && TC.useIntegratedAs())) CmdArgs.push_back("-faddrsig"); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8c704a3078adc..02871d2ce411f 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -115,7 +115,8 @@ CudaInstallationDetector::CudaInstallationDetector( for (const char *Ver : Versions) Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver); - if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu()) + Distro Dist(D.getVFS(), llvm::Triple(llvm::sys::getProcessTriple())); + if (Dist.IsDebian() || Dist.IsUbuntu()) // Special case for Debian to have nvidia-cuda-toolkit work // out of the box. More info on http://bugs.debian.org/882505 Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda"); diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 3e5e8a00652d3..85e94fe018e6a 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -12,6 +12,7 @@ #include "Arch/Sparc.h" #include "CommonArgs.h" #include "clang/Driver/Compilation.h" +#include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "llvm/Option/ArgList.h" @@ -30,6 +31,7 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; + const auto &D = getToolChain().getDriver(); // When building 32-bit code on FreeBSD/amd64, we have to explicitly // instruct as in the base system to assemble 32-bit code. @@ -103,6 +105,19 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } } + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fdebug_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (Map.find('=') == StringRef::npos) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else { + CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map")); + CmdArgs.push_back(Args.MakeArgString(Map)); + } + A->claim(); + } + Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 9bea0b15c8739..4c5d4003f1442 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -351,10 +351,9 @@ SanitizerMask Fuchsia::getDefaultSanitizers() const { case llvm::Triple::x86_64: Res |= SanitizerKind::SafeStack; break; - case llvm::Triple::riscv64: - break; default: - llvm_unreachable("invalid architecture"); + // TODO: Enable SafeStack on RISC-V once tested. + break; } return Res; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index abcf4377fe718..fb13474e0791c 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -929,6 +929,19 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, } } + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fdebug_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (Map.find('=') == StringRef::npos) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else { + CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map")); + CmdArgs.push_back(Args.MakeArgString(Map)); + } + A->claim(); + } + Args.AddAllArgs(CmdArgs, options::OPT_I); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 1053a1a609789..d1f570e75a9bd 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -343,9 +343,8 @@ void HIPToolChain::addClangTargetOptions( else WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc"; - BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc", - "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", - FlushDenormalControlBC, + BCLibs.append({"hip.amdgcn.bc", "ocml.amdgcn.bc", "ockl.amdgcn.bc", + "oclc_finite_only_off.amdgcn.bc", FlushDenormalControlBC, "oclc_correctly_rounded_sqrt_on.amdgcn.bc", "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC, WaveFrontSizeBC}); diff --git a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp index f441f4787097b..8f947e79bd1f1 100644 --- a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp +++ b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp @@ -46,6 +46,8 @@ void Merger::ConstructJob(Compilation &C, const JobAction &JA, // Here we append the input files. If the input files are object files, then // we look for .ifs files present in the same location as the object files. for (const auto &Input : Inputs) { + if (!Input.isFilename()) + continue; SmallString<128> InputFilename(Input.getFilename()); if (Input.getType() == types::TY_Object) llvm::sys::path::replace_extension(InputFilename, ".ifs"); diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index a744b7d632397..2d902cdf7ae65 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -245,7 +245,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) .str()); } - Distro Distro(D.getVFS()); + Distro Distro(D.getVFS(), Triple); if (Distro.IsAlpineLinux() || Triple.isAndroid()) { ExtraOpts.push_back("-z"); @@ -516,7 +516,7 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { const llvm::Triple::ArchType Arch = getArch(); const llvm::Triple &Triple = getTriple(); - const Distro Distro(getDriver().getVFS()); + const Distro Distro(getDriver().getVFS(), Triple); if (Triple.isAndroid()) return Triple.isArch64Bit() ? "/system/bin/linker64" : "/system/bin/linker"; diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 55b82592c09fd..1bb7c35d0c522 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -92,10 +92,10 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, *this, Linker, CmdArgs, Inputs)); - // When optimizing, if wasm-opt is in the PATH, run wasm-opt. + // When optimizing, if wasm-opt is available, run it. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (llvm::ErrorOr WasmOptPath = - llvm::sys::findProgramByName("wasm-opt")) { + auto WasmOptPath = getToolChain().GetProgramPath("wasm-opt"); + if (WasmOptPath != "wasm-opt") { StringRef OOpt = "s"; if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) @@ -106,7 +106,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, OOpt = A->getValue(); if (OOpt != "0") { - const char *WasmOpt = Args.MakeArgString(*WasmOptPath); + const char *WasmOpt = Args.MakeArgString(WasmOptPath); ArgStringList CmdArgs; CmdArgs.push_back(Output.getFilename()); CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 4e42bab561823..f12bca48c630e 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -537,6 +537,8 @@ template <> struct MappingTraits { IO.mapOptional("SpacesBeforeTrailingComments", Style.SpacesBeforeTrailingComments); IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); + IO.mapOptional("SpacesInConditionalStatement", + Style.SpacesInConditionalStatement); IO.mapOptional("SpacesInContainerLiterals", Style.SpacesInContainerLiterals); IO.mapOptional("SpacesInCStyleCastParentheses", @@ -817,6 +819,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.SpaceBeforeCpp11BracedList = false; LLVMStyle.SpaceBeforeSquareBrackets = false; LLVMStyle.SpacesInAngles = false; + LLVMStyle.SpacesInConditionalStatement = false; LLVMStyle.PenaltyBreakAssignment = prec::Assignment; LLVMStyle.PenaltyBreakComment = 300; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 9fe7fdc9ce937..d5d394e61926a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1829,7 +1829,8 @@ class AnnotatingParser { // Use heuristics to recognize unary operators. if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, tok::question, tok::colon, tok::kw_return, - tok::kw_case, tok::at, tok::l_brace, tok::kw_throw)) + tok::kw_case, tok::at, tok::l_brace, tok::kw_throw, + tok::kw_co_return, tok::kw_co_yield)) return TT_UnaryOperator; // There can't be two consecutive binary operators. @@ -2591,6 +2592,13 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { Right.ParameterCount > 0); } +/// Returns \c true if the token is followed by a boolean condition, \c false +/// otherwise. +static bool isKeywordWithCondition(const FormatToken &Tok) { + return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, + tok::kw_constexpr); +} + bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) { @@ -2609,6 +2617,15 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Left.is(tok::l_brace) && Left.BlockKind != BK_Block && Right.is(tok::r_brace) && Right.BlockKind != BK_Block)) return Style.SpaceInEmptyParentheses; + if (Style.SpacesInConditionalStatement) { + if (Left.is(tok::l_paren) && Left.Previous && + isKeywordWithCondition(*Left.Previous)) + return true; + if (Right.is(tok::r_paren) && Right.MatchingParen && + Right.MatchingParen->Previous && + isKeywordWithCondition(*Right.MatchingParen->Previous)) + return true; + } if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) return (Right.is(TT_CastRParen) || (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) @@ -3043,7 +3060,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // The identifier might actually be a macro name such as ALWAYS_INLINE. If // this turns out to be too lenient, add analysis of the identifier itself. return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); - if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment)) + if (Right.is(tok::coloncolon) && + !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) return (Left.is(TT_TemplateOpener) && Style.Standard < FormatStyle::LS_Cpp11) || !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 181bbc6440fae..b4db2fa3c4022 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2876,8 +2876,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, getLastArgIntValue(Args, OPT_fconstexpr_steps, 1048576, Diags); Opts.EnableNewConstInterp = Args.hasArg(OPT_fexperimental_new_constant_interpreter); - Opts.ForceNewConstInterp = - Args.hasArg(OPT_fforce_experimental_new_constant_interpreter); Opts.BracketDepth = getLastArgIntValue(Args, OPT_fbracket_depth, 256, Diags); Opts.DelayedTemplateParsing = Args.hasArg(OPT_fdelayed_template_parsing); Opts.NumLargeByValueCopy = @@ -3173,6 +3171,34 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } + LangOptions::FPRoundingModeKind FPRM = LangOptions::FPR_ToNearest; + if (Args.hasArg(OPT_frounding_math)) { + FPRM = LangOptions::FPR_Dynamic; + } + Opts.setFPRoundingMode(FPRM); + + if (Args.hasArg(OPT_ftrapping_math)) { + Opts.setFPExceptionMode(LangOptions::FPE_Strict); + } + + if (Args.hasArg(OPT_fno_trapping_math)) { + Opts.setFPExceptionMode(LangOptions::FPE_Ignore); + } + + LangOptions::FPExceptionModeKind FPEB = LangOptions::FPE_Ignore; + if (Arg *A = Args.getLastArg(OPT_ffp_exception_behavior_EQ)) { + StringRef Val = A->getValue(); + if (Val.equals("ignore")) + FPEB = LangOptions::FPE_Ignore; + else if (Val.equals("maytrap")) + FPEB = LangOptions::FPE_MayTrap; + else if (Val.equals("strict")) + FPEB = LangOptions::FPE_Strict; + else + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; + } + Opts.setFPExceptionMode(FPEB); + Opts.RetainCommentsFromSystemHeaders = Args.hasArg(OPT_fretain_comments_from_system_headers); @@ -3341,6 +3367,9 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl)) Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue()); + for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) + Opts.MacroPrefixMap.insert(StringRef(A).split('=')); + if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) { StringRef Value(A->getValue()); size_t Comma = Value.find(','); diff --git a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp index ab62b633cda38..18c4814bbd5cc 100644 --- a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -26,7 +26,8 @@ using namespace llvm::opt; std::unique_ptr clang::createInvocationFromCommandLine( ArrayRef ArgList, IntrusiveRefCntPtr Diags, - IntrusiveRefCntPtr VFS, bool ShouldRecoverOnErorrs) { + IntrusiveRefCntPtr VFS, bool ShouldRecoverOnErorrs, + std::vector *CC1Args) { if (!Diags.get()) { // No diagnostics engine was provided, so create our own diagnostics object // with the default options. @@ -89,6 +90,8 @@ std::unique_ptr clang::createInvocationFromCommandLine( } const ArgStringList &CCArgs = Cmd.getArguments(); + if (CC1Args) + *CC1Args = {CCArgs.begin(), CCArgs.end()}; auto CI = std::make_unique(); if (!CompilerInvocation::CreateFromArgs(*CI, CCArgs, *Diags) && !ShouldRecoverOnErorrs) diff --git a/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp b/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp index 025e6eb1508fc..3b0a5668af940 100644 --- a/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp +++ b/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp @@ -195,6 +195,10 @@ class InterfaceStubFunctionsConsumer : public ASTConsumer { case Decl::Kind::TemplateTemplateParm: case Decl::Kind::ClassTemplatePartialSpecialization: case Decl::Kind::IndirectField: + case Decl::Kind::ConstructorUsingShadow: + case Decl::Kind::CXXDeductionGuide: + case Decl::Kind::NamespaceAlias: + case Decl::Kind::UnresolvedUsingTypename: return true; case Decl::Kind::Var: { // Bail on any VarDecl that either has no named symbol. diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index ced32c670288e..0e5a8e504dc59 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -535,21 +535,15 @@ PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() { // FIXME: This is a hack so that we can override the preamble file during // crash-recovery testing, which is the only case where the preamble files // are not necessarily cleaned up. - const char *TmpFile = ::getenv("CINDEXTEST_PREAMBLE_FILE"); - if (TmpFile) - return TempPCHFile::createFromCustomPath(TmpFile); - return TempPCHFile::createInSystemTempDir("preamble", "pch"); -} + if (const char *TmpFile = ::getenv("CINDEXTEST_PREAMBLE_FILE")) + return TempPCHFile(TmpFile); -llvm::ErrorOr -PrecompiledPreamble::TempPCHFile::createInSystemTempDir(const Twine &Prefix, - StringRef Suffix) { llvm::SmallString<64> File; // Using a version of createTemporaryFile with a file descriptor guarantees // that we would never get a race condition in a multi-threaded setting // (i.e., multiple threads getting the same temporary path). int FD; - auto EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, FD, File); + auto EC = llvm::sys::fs::createTemporaryFile("preamble", "pch", FD, File); if (EC) return EC; // We only needed to make sure the file exists, close the file right away. @@ -557,11 +551,6 @@ PrecompiledPreamble::TempPCHFile::createInSystemTempDir(const Twine &Prefix, return TempPCHFile(std::move(File).str()); } -llvm::ErrorOr -PrecompiledPreamble::TempPCHFile::createFromCustomPath(const Twine &Path) { - return TempPCHFile(Path.str()); -} - PrecompiledPreamble::TempPCHFile::TempPCHFile(std::string FilePath) : FilePath(std::move(FilePath)) { TemporaryFiles::getInstance().addFile(*this->FilePath); diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index f063ed711c44c..029bfe1cd6008 100644 --- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -763,12 +763,13 @@ bool Minimizer::lexEndif(const char *&First, const char *const End) { if (top() == pp_else) popToken(); - // Strip out "#elif" if they're empty. - while (top() == pp_elif) - popToken(); - - // If "#if" is empty, strip it and skip the "#endif". - if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { + // If "#ifdef" is empty, strip it and skip the "#endif". + // + // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, + // we can skip empty `#if` and `#elif` blocks as well after scanning for a + // literal __has_include in the condition. Even without that rule we could + // drop the tokens if we scan for identifiers in the condition and find none. + if (top() == pp_ifdef || top() == pp_ifndef) { popToken(); skipLine(First, End); return false; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 43236c2ef8caa..6f470cae4929d 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -29,6 +29,7 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorLexer.h" +#include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -1450,6 +1451,17 @@ static bool isTargetEnvironment(const TargetInfo &TI, return TI.getTriple().getEnvironment() == Env.getEnvironment(); } +static void remapMacroPath( + SmallString<256> &Path, + const std::map> + &MacroPrefixMap) { + for (const auto &Entry : MacroPrefixMap) + if (Path.startswith(Entry.first)) { + Path = (Twine(Entry.second) + Path.substr(Entry.first.size())).str(); + break; + } +} + /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// as a builtin macro, handle it and return the next token as 'Tok'. void Preprocessor::ExpandBuiltinMacro(Token &Tok) { @@ -1516,7 +1528,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } // Escape this filename. Turn '\' -> '\\' '"' -> '\"' - SmallString<128> FN; + SmallString<256> FN; if (PLoc.isValid()) { // __FILE_NAME__ is a Clang-specific extension that expands to the // the last part of __FILE__. @@ -1532,6 +1544,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { FN += PLoc.getFilename(); } Lexer::Stringify(FN); + remapMacroPath(FN, PPOpts->MacroPrefixMap); OS << '"' << FN << '"'; } Tok.setKind(tok::string_literal); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index c6ffbfc968d07..fe409327bfb4b 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3367,7 +3367,7 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc, // We've finished parsing everything, including default argument // initializers. - Actions.ActOnFinishCXXNonNestedClass(TagDecl); + Actions.ActOnFinishCXXNonNestedClass(); } if (TagDecl) diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 77eed54376098..7dfe71fb9ebcc 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1352,6 +1352,13 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( // Parse attribute-specifier[opt]. MaybeParseCXX11Attributes(Attr, &DeclEndLoc); + // Parse OpenCL addr space attribute. + if (Tok.isOneOf(tok::kw___private, tok::kw___global, tok::kw___local, + tok::kw___constant, tok::kw___generic)) { + ParseOpenCLQualifiers(DS.getAttributes()); + ConsumeToken(); + } + SourceLocation FunLocalRangeEnd = DeclEndLoc; // Parse trailing-return-type[opt]. @@ -1380,10 +1387,12 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( NoexceptExpr.isUsable() ? NoexceptExpr.get() : nullptr, /*ExceptionSpecTokens*/ nullptr, /*DeclsInPrototype=*/None, LParenLoc, FunLocalRangeEnd, D, - TrailingReturnType), + TrailingReturnType, &DS), std::move(Attr), DeclEndLoc); } else if (Tok.isOneOf(tok::kw_mutable, tok::arrow, tok::kw___attribute, - tok::kw_constexpr, tok::kw_consteval) || + tok::kw_constexpr, tok::kw_consteval, + tok::kw___private, tok::kw___global, tok::kw___local, + tok::kw___constant, tok::kw___generic) || (Tok.is(tok::l_square) && NextToken().is(tok::l_square))) { // It's common to forget that one needs '()' before 'mutable', an attribute // specifier, or the result type. Deal with this. @@ -1392,6 +1401,11 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( case tok::kw_mutable: TokKind = 0; break; case tok::arrow: TokKind = 1; break; case tok::kw___attribute: + case tok::kw___private: + case tok::kw___global: + case tok::kw___local: + case tok::kw___constant: + case tok::kw___generic: case tok::l_square: TokKind = 2; break; case tok::kw_constexpr: TokKind = 3; break; case tok::kw_consteval: TokKind = 4; break; diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index cb2710ab8c67c..c111682024772 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -1192,6 +1192,59 @@ bool Parser::ParseParenExprOrCondition(StmtResult *InitStmt, return false; } +namespace { + +enum MisleadingStatementKind { MSK_if, MSK_else, MSK_for, MSK_while }; + +struct MisleadingIndentationChecker { + Parser &P; + SourceLocation StmtLoc; + SourceLocation PrevLoc; + unsigned NumDirectives; + MisleadingStatementKind Kind; + bool NeedsChecking; + bool ShouldSkip; + MisleadingIndentationChecker(Parser &P, MisleadingStatementKind K, + SourceLocation SL) + : P(P), StmtLoc(SL), PrevLoc(P.getCurToken().getLocation()), + NumDirectives(P.getPreprocessor().getNumDirectives()), Kind(K), + NeedsChecking(true), ShouldSkip(P.getCurToken().is(tok::l_brace)) { + if (!P.MisleadingIndentationElseLoc.isInvalid()) { + StmtLoc = P.MisleadingIndentationElseLoc; + P.MisleadingIndentationElseLoc = SourceLocation(); + } + if (Kind == MSK_else && !ShouldSkip) + P.MisleadingIndentationElseLoc = SL; + } + void Check() { + NeedsChecking = false; + Token Tok = P.getCurToken(); + if (ShouldSkip || NumDirectives != P.getPreprocessor().getNumDirectives() || + Tok.isOneOf(tok::semi, tok::r_brace) || Tok.isAnnotation() || + Tok.getLocation().isMacroID() || PrevLoc.isMacroID() || + StmtLoc.isMacroID() || + (Kind == MSK_else && P.MisleadingIndentationElseLoc.isInvalid())) { + P.MisleadingIndentationElseLoc = SourceLocation(); + return; + } + + SourceManager &SM = P.getPreprocessor().getSourceManager(); + unsigned PrevColNum = SM.getSpellingColumnNumber(PrevLoc); + unsigned CurColNum = SM.getSpellingColumnNumber(Tok.getLocation()); + unsigned StmtColNum = SM.getSpellingColumnNumber(StmtLoc); + + if (PrevColNum != 0 && CurColNum != 0 && StmtColNum != 0 && + ((PrevColNum > StmtColNum && PrevColNum == CurColNum) || + !Tok.isAtStartOfLine()) && SM.getPresumedLineNumber(StmtLoc) != + SM.getPresumedLineNumber(Tok.getLocation())) { + P.Diag(Tok.getLocation(), diag::warn_misleading_indentation) + << Kind; + P.Diag(StmtLoc, diag::note_previous_statement); + } + } +}; + +} /// ParseIfStatement /// if-statement: [C99 6.8.4.1] @@ -1266,6 +1319,8 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) { // ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); + MisleadingIndentationChecker MIChecker(*this, MSK_if, IfLoc); + // Read the 'then' stmt. SourceLocation ThenStmtLoc = Tok.getLocation(); @@ -1279,6 +1334,9 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) { ThenStmt = ParseStatement(&InnerStatementTrailingElseLoc); } + if (Tok.isNot(tok::kw_else)) + MIChecker.Check(); + // Pop the 'if' scope if needed. InnerScope.Exit(); @@ -1306,12 +1364,17 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) { ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); + MisleadingIndentationChecker MIChecker(*this, MSK_else, ElseLoc); + EnterExpressionEvaluationContext PotentiallyDiscarded( Actions, Sema::ExpressionEvaluationContext::DiscardedStatement, nullptr, Sema::ExpressionEvaluationContextRecord::EK_Other, /*ShouldEnter=*/ConstexprCondition && *ConstexprCondition); ElseStmt = ParseStatement(); + if (ElseStmt.isUsable()) + MIChecker.Check(); + // Pop the 'else' scope if needed. InnerScope.Exit(); } else if (Tok.is(tok::code_completion)) { @@ -1485,9 +1548,13 @@ StmtResult Parser::ParseWhileStatement(SourceLocation *TrailingElseLoc) { // ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); + MisleadingIndentationChecker MIChecker(*this, MSK_while, WhileLoc); + // Read the body statement. StmtResult Body(ParseStatement(TrailingElseLoc)); + if (Body.isUsable()) + MIChecker.Check(); // Pop the body scope if needed. InnerScope.Exit(); WhileScope.Exit(); @@ -1919,9 +1986,14 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { if (C99orCXXorObjC) getCurScope()->decrementMSManglingNumber(); + MisleadingIndentationChecker MIChecker(*this, MSK_for, ForLoc); + // Read the body statement. StmtResult Body(ParseStatement(TrailingElseLoc)); + if (Body.isUsable()) + MIChecker.Check(); + // Pop the body scope if needed. InnerScope.Exit(); diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index d7327eebd95c2..418729a4b2658 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -2067,9 +2067,21 @@ Parser::TPResult Parser::TryParseFunctionDeclarator() { /// Parser::TPResult Parser::TryParseBracketDeclarator() { ConsumeBracket(); - if (!SkipUntil(tok::r_square, StopAtSemi)) + + // A constant-expression cannot begin with a '{', but the + // expr-or-braced-init-list of a postfix-expression can. + if (Tok.is(tok::l_brace)) + return TPResult::False; + + if (!SkipUntil(tok::r_square, tok::comma, StopAtSemi | StopBeforeMatch)) return TPResult::Error; + // If we hit a comma before the ']', this is not a constant-expression, + // but might still be the expr-or-braced-init-list of a postfix-expression. + if (Tok.isNot(tok::r_square)) + return TPResult::False; + + ConsumeBracket(); return TPResult::Ambiguous; } diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 0bd4c51a04c2a..353e0c1d8c8d2 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -274,14 +274,21 @@ def Event : Type<"Event", QualType<"OCLEventTy">>; def VecAndScalar: IntList<"VecAndScalar", [1, 2, 3, 4, 8, 16]>; def VecNoScalar : IntList<"VecNoScalar", [2, 3, 4, 8, 16]>; def Vec1 : IntList<"Vec1", [1]>; +def Vec2 : IntList<"Vec2", [2]>; +def Vec4 : IntList<"Vec4", [4]>; +def Vec8 : IntList<"Vec8", [8]>; +def Vec16 : IntList<"Vec16", [16]>; def Vec1234 : IntList<"Vec1234", [1, 2, 3, 4]>; // Type lists. -def TLAll : TypeList<"TLAll", [Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; +def TLAll : TypeList<"TLAll", [Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; +def TLAllUnsigned : TypeList<"TLAllUnsigned", [UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong, UInt, ULong, UShort]>; def TLFloat : TypeList<"TLFloat", [Float, Double, Half]>; def TLSignedInts : TypeList<"TLSignedInts", [Char, Short, Int, Long]>; def TLUnsignedInts : TypeList<"TLUnsignedInts", [UChar, UShort, UInt, ULong]>; +def TLIntLongFloats : TypeList<"TLIntLongFloats", [Int, UInt, Long, ULong, Float, Double, Half]>; + // All unsigned integer types twice, to facilitate unsigned return types for e.g. // uchar abs(char) and // uchar abs(uchar). @@ -306,6 +313,8 @@ def SGenTypeN : GenericType<"SGenTypeN", TLSignedInts, VecAndScalar def UGenTypeN : GenericType<"UGenTypeN", TLUnsignedInts, VecAndScalar>; // Float def FGenTypeN : GenericType<"FGenTypeN", TLFloat, VecAndScalar>; +// (u)int, (u)long, and all floats +def IntLongFloatGenType1 : GenericType<"IntLongFloatGenType1", TLIntLongFloats, Vec1>; // GenType definitions for every single base type (e.g. fp32 only). // Names are like: GenTypeFloatVecAndScalar. @@ -867,6 +876,31 @@ foreach Type = [Int, UInt] in { } } +//-------------------------------------------------------------------- +// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions +// --- Table 19 --- +foreach name = ["shuffle"] in { + foreach VSize1 = [Vec2, Vec4, Vec8, Vec16] in { + foreach VSize2 = [Vec2, Vec4, Vec8, Vec16] in { + def : Builtin, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAllUnsigned" # VSize1.Name, TLAllUnsigned, VSize1>], + Attr.Const>; + } + } +} +foreach name = ["shuffle2"] in { + foreach VSize1 = [Vec2, Vec4, Vec8, Vec16] in { + foreach VSize2 = [Vec2, Vec4, Vec8, Vec16] in { + def : Builtin, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAllUnsigned" # VSize1.Name, TLAllUnsigned, VSize1>], + Attr.Const>; + } + } +} + //-------------------------------------------------------------------- // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14: Image Read and Write Functions // OpenCL Extension v2.0 s5.1.8 and s6.1.8: Image Read and Write Functions @@ -1020,6 +1054,27 @@ foreach aQual = ["WO", "RW"] in { } +//-------------------------------------------------------------------- +// OpenCL v2.0 s6.13.15 - Work-group Functions +// --- Table 26 --- +let MinVersion = CL20 in { + foreach name = ["work_group_all", "work_group_any"] in { + def : Builtin; + } + foreach name = ["work_group_broadcast"] in { + def : Builtin; + def : Builtin; + def : Builtin; + } + foreach op = ["add", "min", "max"] in { + foreach name = ["work_group_reduce_", "work_group_scan_exclusive_", + "work_group_scan_inclusive_"] in { + def : Builtin; + } + } +} + + // OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions let MinVersion = CL20 in { let Extension = "cl_khr_subgroups" in { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index a371b7b793ef3..fea48cfced166 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1311,6 +1311,12 @@ NamedDecl *Sema::getCurFunctionOrMethodDecl() { return nullptr; } +LangAS Sema::getDefaultCXXMethodAddrSpace() const { + if (getLangOpts().OpenCL) + return LangAS::opencl_generic; + return LangAS::Default; +} + void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // FIXME: It doesn't make sense to me that DiagID is an incoming argument here // and yet we also use the current diag ID on the DiagnosticsEngine. This has diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index efd2ad22dcf12..dca54fddb1193 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3061,8 +3061,37 @@ bool Sema::CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, CheckHexagonBuiltinArgument(BuiltinID, TheCall); } +bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + return CheckMipsBuiltinCpu(BuiltinID, TheCall) || + CheckMipsBuiltinArgument(BuiltinID, TheCall); +} + +bool Sema::CheckMipsBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall) { + const TargetInfo &TI = Context.getTargetInfo(); + + if (Mips::BI__builtin_mips_addu_qb <= BuiltinID && + BuiltinID <= Mips::BI__builtin_mips_lwx) { + if (!TI.hasFeature("dsp")) + return Diag(TheCall->getBeginLoc(), diag::err_mips_builtin_requires_dsp); + } + + if (Mips::BI__builtin_mips_absq_s_qb <= BuiltinID && + BuiltinID <= Mips::BI__builtin_mips_subuh_r_qb) { + if (!TI.hasFeature("dspr2")) + return Diag(TheCall->getBeginLoc(), + diag::err_mips_builtin_requires_dspr2); + } + + if (Mips::BI__builtin_msa_add_a_b <= BuiltinID && + BuiltinID <= Mips::BI__builtin_msa_xori_b) { + if (!TI.hasFeature("msa")) + return Diag(TheCall->getBeginLoc(), diag::err_mips_builtin_requires_msa); + } + + return false; +} -// CheckMipsBuiltinFunctionCall - Checks the constant value passed to the +// CheckMipsBuiltinArgument - Checks the constant value passed to the // intrinsic is correct. The switch statement is ordered by DSP, MSA. The // ordering for DSP is unspecified. MSA is ordered by the data format used // by the underlying instruction i.e., df/m, df/n and then by size. @@ -3071,7 +3100,7 @@ bool Sema::CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, // definitions from include/clang/Basic/BuiltinsMips.def. // FIXME: GCC is strict on signedness for some of these intrinsics, we should // be too. -bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { +bool Sema::CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall) { unsigned i = 0, l = 0, u = 0, m = 0; switch (BuiltinID) { default: return false; @@ -12991,7 +13020,8 @@ class SequenceChecker : public EvaluatedExprVisitor { // expression or statement in the body of the function [and thus before // the value computation of its result]. SequencedSubexpression Sequenced(*this); - Base::VisitCallExpr(CE); + SemaRef.runWithSufficientStackSpace(CE->getExprLoc(), + [&] { Base::VisitCallExpr(CE); }); // FIXME: CXXNewExpr and CXXDeleteExpr implicitly call functions. } @@ -14746,6 +14776,8 @@ void Sema::RefersToMemberWithReducedAlignment( bool AnyIsPacked = false; do { QualType BaseType = ME->getBase()->getType(); + if (BaseType->isDependentType()) + return; if (ME->isArrow()) BaseType = BaseType->getPointeeType(); RecordDecl *RD = BaseType->castAs()->getDecl(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index ed5c6f878776b..a0fdf77dba51b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6117,6 +6117,41 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) { return false; } +void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) { + if (Decl->getType().getQualifiers().hasAddressSpace()) + return; + if (VarDecl *Var = dyn_cast(Decl)) { + QualType Type = Var->getType(); + if (Type->isSamplerT() || Type->isVoidType()) + return; + LangAS ImplAS = LangAS::opencl_private; + if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) && + Var->hasGlobalStorage()) + ImplAS = LangAS::opencl_global; + // If the original type from a decayed type is an array type and that array + // type has no address space yet, deduce it now. + if (auto DT = dyn_cast(Type)) { + auto OrigTy = DT->getOriginalType(); + if (!OrigTy.getQualifiers().hasAddressSpace() && OrigTy->isArrayType()) { + // Add the address space to the original array type and then propagate + // that to the element type through `getAsArrayType`. + OrigTy = Context.getAddrSpaceQualType(OrigTy, ImplAS); + OrigTy = QualType(Context.getAsArrayType(OrigTy), 0); + // Re-generate the decayed type. + Type = Context.getDecayedType(OrigTy); + } + } + Type = Context.getAddrSpaceQualType(Type, ImplAS); + // Apply any qualifiers (including address space) from the array type to + // the element type. This implements C99 6.7.3p8: "If the specification of + // an array type includes any type qualifiers, the element type is so + // qualified, not the array type." + if (Type->isArrayType()) + Type = QualType(Context.getAsArrayType(Type), 0); + Decl->setType(Type); + } +} + static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) { // Ensure that an auto decl is deduced otherwise the checks below might cache // the wrong linkage. @@ -6474,27 +6509,79 @@ static bool isDeclExternC(const Decl *D) { llvm_unreachable("Unknown type of decl!"); } +/// Returns true if there hasn't been any invalid type diagnosed. +static bool diagnoseOpenCLTypes(Scope *S, Sema &Se, Declarator &D, + DeclContext *DC, QualType R) { + // OpenCL v2.0 s6.9.b - Image type can only be used as a function argument. + // OpenCL v2.0 s6.13.16.1 - Pipe type can only be used as a function + // argument. + if (R->isImageType() || R->isPipeType()) { + Se.Diag(D.getIdentifierLoc(), + diag::err_opencl_type_can_only_be_used_as_function_parameter) + << R; + D.setInvalidType(); + return false; + } -NamedDecl *Sema::ActOnVariableDeclarator( - Scope *S, Declarator &D, DeclContext *DC, TypeSourceInfo *TInfo, - LookupResult &Previous, MultiTemplateParamsArg TemplateParamLists, - bool &AddToScope, ArrayRef Bindings) { - QualType R = TInfo->getType(); - DeclarationName Name = GetNameForDeclarator(D).getName(); + // OpenCL v1.2 s6.9.r: + // The event type cannot be used to declare a program scope variable. + // OpenCL v2.0 s6.9.q: + // The clk_event_t and reserve_id_t types cannot be declared in program + // scope. + if (NULL == S->getParent()) { + if (R->isReserveIDT() || R->isClkEventT() || R->isEventT()) { + Se.Diag(D.getIdentifierLoc(), + diag::err_invalid_type_for_program_scope_var) + << R; + D.setInvalidType(); + return false; + } + } - IdentifierInfo *II = Name.getAsIdentifierInfo(); + // OpenCL v1.0 s6.8.a.3: Pointers to functions are not allowed. + QualType NR = R; + while (NR->isPointerType()) { + if (NR->isFunctionPointerType()) { + Se.Diag(D.getIdentifierLoc(), diag::err_opencl_function_pointer); + D.setInvalidType(); + return false; + } + NR = NR->getPointeeType(); + } - if (D.isDecompositionDeclarator()) { - // Take the name of the first declarator as our name for diagnostic - // purposes. - auto &Decomp = D.getDecompositionDeclarator(); - if (!Decomp.bindings().empty()) { - II = Decomp.bindings()[0].Name; - Name = II; + if (!Se.getOpenCLOptions().isEnabled("cl_khr_fp16")) { + // OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and + // half array type (unless the cl_khr_fp16 extension is enabled). + if (Se.Context.getBaseElementType(R)->isHalfType()) { + Se.Diag(D.getIdentifierLoc(), diag::err_opencl_half_declaration) << R; + D.setInvalidType(); + return false; } - } else if (!II) { - Diag(D.getIdentifierLoc(), diag::err_bad_variable_name) << Name; - return nullptr; + } + + // OpenCL v1.2 s6.9.r: + // The event type cannot be used with the __local, __constant and __global + // address space qualifiers. + if (R->isEventT()) { + if (R.getAddressSpace() != LangAS::opencl_private) { + Se.Diag(D.getBeginLoc(), diag::err_event_t_addr_space_qual); + D.setInvalidType(); + return false; + } + } + + // C++ for OpenCL does not allow the thread_local storage qualifier. + // OpenCL C does not support thread_local either, and + // also reject all other thread storage class specifiers. + DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec(); + if (TSC != TSCS_unspecified) { + bool IsCXX = Se.getLangOpts().OpenCLCPlusPlus; + Se.Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(), + diag::err_opencl_unknown_type_specifier) + << IsCXX << Se.getLangOpts().getOpenCLVersionTuple().getAsString() + << DeclSpec::getSpecifierName(TSC) << 1; + D.setInvalidType(); + return false; } if (R->isSamplerT()) { @@ -6503,7 +6590,8 @@ NamedDecl *Sema::ActOnVariableDeclarator( // space qualifiers. if (R.getAddressSpace() == LangAS::opencl_local || R.getAddressSpace() == LangAS::opencl_global) { - Diag(D.getIdentifierLoc(), diag::err_wrong_sampler_addressspace); + Se.Diag(D.getIdentifierLoc(), diag::err_wrong_sampler_addressspace); + D.setInvalidType(); } // OpenCL v1.2 s6.12.14.1: @@ -6512,79 +6600,35 @@ NamedDecl *Sema::ActOnVariableDeclarator( if (DC->isTranslationUnit() && !(R.getAddressSpace() == LangAS::opencl_constant || R.isConstQualified())) { - Diag(D.getIdentifierLoc(), diag::err_opencl_nonconst_global_sampler); + Se.Diag(D.getIdentifierLoc(), diag::err_opencl_nonconst_global_sampler); D.setInvalidType(); } + if (D.isInvalidType()) + return false; } + return true; +} - if (getLangOpts().OpenCL) { - // OpenCL v2.0 s6.9.b - Image type can only be used as a function argument. - // OpenCL v2.0 s6.13.16.1 - Pipe type can only be used as a function - // argument. - if (R->isImageType() || R->isPipeType()) { - Diag(D.getIdentifierLoc(), - diag::err_opencl_type_can_only_be_used_as_function_parameter) - << R; - D.setInvalidType(); - return nullptr; - } - - // OpenCL v1.2 s6.9.r: - // The event type cannot be used to declare a program scope variable. - // OpenCL v2.0 s6.9.q: - // The clk_event_t and reserve_id_t types cannot be declared in program scope. - if (NULL == S->getParent()) { - if (R->isReserveIDT() || R->isClkEventT() || R->isEventT()) { - Diag(D.getIdentifierLoc(), - diag::err_invalid_type_for_program_scope_var) << R; - D.setInvalidType(); - return nullptr; - } - } - - // OpenCL v1.0 s6.8.a.3: Pointers to functions are not allowed. - QualType NR = R; - while (NR->isPointerType()) { - if (NR->isFunctionPointerType()) { - Diag(D.getIdentifierLoc(), diag::err_opencl_function_pointer); - D.setInvalidType(); - break; - } - NR = NR->getPointeeType(); - } - - if (!getOpenCLOptions().isEnabled("cl_khr_fp16")) { - // OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and - // half array type (unless the cl_khr_fp16 extension is enabled). - if (Context.getBaseElementType(R)->isHalfType()) { - Diag(D.getIdentifierLoc(), diag::err_opencl_half_declaration) << R; - D.setInvalidType(); - } - } +NamedDecl *Sema::ActOnVariableDeclarator( + Scope *S, Declarator &D, DeclContext *DC, TypeSourceInfo *TInfo, + LookupResult &Previous, MultiTemplateParamsArg TemplateParamLists, + bool &AddToScope, ArrayRef Bindings) { + QualType R = TInfo->getType(); + DeclarationName Name = GetNameForDeclarator(D).getName(); - // OpenCL v1.2 s6.9.r: - // The event type cannot be used with the __local, __constant and __global - // address space qualifiers. - if (R->isEventT()) { - if (R.getAddressSpace() != LangAS::opencl_private) { - Diag(D.getBeginLoc(), diag::err_event_t_addr_space_qual); - D.setInvalidType(); - } - } + IdentifierInfo *II = Name.getAsIdentifierInfo(); - // C++ for OpenCL does not allow the thread_local storage qualifier. - // OpenCL C does not support thread_local either, and - // also reject all other thread storage class specifiers. - DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec(); - if (TSC != TSCS_unspecified) { - bool IsCXX = getLangOpts().OpenCLCPlusPlus; - Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(), - diag::err_opencl_unknown_type_specifier) - << IsCXX << getLangOpts().getOpenCLVersionTuple().getAsString() - << DeclSpec::getSpecifierName(TSC) << 1; - D.setInvalidType(); - return nullptr; + if (D.isDecompositionDeclarator()) { + // Take the name of the first declarator as our name for diagnostic + // purposes. + auto &Decomp = D.getDecompositionDeclarator(); + if (!Decomp.bindings().empty()) { + II = Decomp.bindings()[0].Name; + Name = II; } + } else if (!II) { + Diag(D.getIdentifierLoc(), diag::err_bad_variable_name) << Name; + return nullptr; } DeclSpec::SCS SCSpec = D.getDeclSpec().getStorageClassSpec(); @@ -6950,6 +6994,13 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } + if (getLangOpts().OpenCL) { + + deduceOpenCLAddressSpace(NewVD); + + diagnoseOpenCLTypes(S, *this, D, DC, NewVD->getType()); + } + // Handle attributes prior to checking for duplicates in MergeVarDecl ProcessDeclAttributes(S, NewVD, D); @@ -7017,8 +7068,6 @@ NamedDecl *Sema::ActOnVariableDeclarator( Diag(E->getExprLoc(), diag::err_asm_invalid_global_var_reg) << Label; else if (HasSizeMismatch) Diag(E->getExprLoc(), diag::err_asm_register_size_mismatch) << Label; - else if (!TI.isRegisterReservedGlobally(Label)) - Diag(E->getExprLoc(), diag::err_asm_missing_fixed_reg_opt) << Label; } if (!R->isIntegralType(Context) && !R->isPointerType()) { @@ -11289,6 +11338,9 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit, if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl)) VDecl->setInvalidDecl(); + if (getLangOpts().OpenCL) + deduceOpenCLAddressSpace(VDecl); + // If this is a redeclaration, check that the type we just deduced matches // the previously declared type. if (VarDecl *Old = VDecl->getPreviousDecl()) { @@ -13111,6 +13163,10 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) { if (New->hasAttr()) { Diag(New->getLocation(), diag::err_block_on_nonlocal); } + + if (getLangOpts().OpenCL) + deduceOpenCLAddressSpace(New); + return New; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 8c434be523172..fa6a2e92b8cbb 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -11417,10 +11417,9 @@ void Sema::setupImplicitSpecialMemberType(CXXMethodDecl *SpecialMem, // Build an exception specification pointing back at this constructor. FunctionProtoType::ExtProtoInfo EPI = getImplicitMethodEPI(*this, SpecialMem); - if (getLangOpts().OpenCLCPlusPlus) { - // OpenCL: Implicitly defaulted special member are of the generic address - // space. - EPI.TypeQuals.addAddressSpace(LangAS::opencl_generic); + LangAS AS = getDefaultCXXMethodAddrSpace(); + if (AS != LangAS::Default) { + EPI.TypeQuals.addAddressSpace(AS); } auto QT = Context.getFunctionType(ResultTy, Args, EPI); @@ -11830,7 +11829,7 @@ void Sema::ActOnFinishCXXMemberDecls() { } } -void Sema::ActOnFinishCXXNonNestedClass(Decl *D) { +void Sema::ActOnFinishCXXNonNestedClass() { referenceDLLExportedClassMethods(); if (!DelayedDllExportMemberFunctions.empty()) { @@ -12330,8 +12329,9 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) { return nullptr; QualType ArgType = Context.getTypeDeclType(ClassDecl); - if (Context.getLangOpts().OpenCLCPlusPlus) - ArgType = Context.getAddrSpaceQualType(ArgType, LangAS::opencl_generic); + LangAS AS = getDefaultCXXMethodAddrSpace(); + if (AS != LangAS::Default) + ArgType = Context.getAddrSpaceQualType(ArgType, AS); QualType RetType = Context.getLValueReferenceType(ArgType); bool Const = ClassDecl->implicitCopyAssignmentHasConstParam(); if (Const) @@ -12406,8 +12406,7 @@ static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) { // In Microsoft mode, assignment operations don't affect constructors and // vice versa. - if (RD->hasUserDeclaredDestructor() && - RD->getDestructor()->isUserProvided()) { + if (RD->hasUserDeclaredDestructor()) { UserDeclaredOperation = RD->getDestructor(); } else if (!isa(CopyOp) && RD->hasUserDeclaredCopyConstructor() && @@ -12435,9 +12434,10 @@ static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) { if (UserDeclaredOperation && UserDeclaredOperation->isUserProvided()) { S.Diag(UserDeclaredOperation->getLocation(), - diag::warn_deprecated_copy_operation) - << RD << /*copy assignment*/!isa(CopyOp) - << /*destructor*/isa(UserDeclaredOperation); + isa(UserDeclaredOperation) + ? diag::warn_deprecated_copy_dtor_operation + : diag::warn_deprecated_copy_operation) + << RD << /*copy assignment*/ !isa(CopyOp); } } @@ -12656,8 +12656,9 @@ CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) { // constructor rules. QualType ArgType = Context.getTypeDeclType(ClassDecl); - if (Context.getLangOpts().OpenCLCPlusPlus) - ArgType = Context.getAddrSpaceQualType(ArgType, LangAS::opencl_generic); + LangAS AS = getDefaultCXXMethodAddrSpace(); + if (AS != LangAS::Default) + ArgType = Context.getAddrSpaceQualType(ArgType, AS); QualType RetType = Context.getLValueReferenceType(ArgType); ArgType = Context.getRValueReferenceType(ArgType); @@ -13034,8 +13035,9 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( if (Const) ArgType = ArgType.withConst(); - if (Context.getLangOpts().OpenCLCPlusPlus) - ArgType = Context.getAddrSpaceQualType(ArgType, LangAS::opencl_generic); + LangAS AS = getDefaultCXXMethodAddrSpace(); + if (AS != LangAS::Default) + ArgType = Context.getAddrSpaceQualType(ArgType, AS); ArgType = Context.getLValueReferenceType(ArgType); @@ -13166,8 +13168,9 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor( QualType ClassType = Context.getTypeDeclType(ClassDecl); QualType ArgType = ClassType; - if (Context.getLangOpts().OpenCLCPlusPlus) - ArgType = Context.getAddrSpaceQualType(ClassType, LangAS::opencl_generic); + LangAS AS = getDefaultCXXMethodAddrSpace(); + if (AS != LangAS::Default) + ArgType = Context.getAddrSpaceQualType(ClassType, AS); ArgType = Context.getRValueReferenceType(ArgType); bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 48d3e69c5bc7a..f01f03d756696 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13103,6 +13103,16 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); + if (ResultTy->isRealFloatingType() && + (getLangOpts().getFPRoundingMode() != LangOptions::FPR_ToNearest || + getLangOpts().getFPExceptionMode() != LangOptions::FPE_Ignore)) + // Mark the current function as usng floating point constrained intrinsics + if (FunctionDecl *F = dyn_cast(CurContext)) +{ + F->setUsesFPIntrin(true); + printf("Enclosing function uses fp intrinsics\n"); +} + // Some of the binary operations require promoting operands of half vector to // float vectors and truncating the result back to half vector. For now, we do // this only when HalfArgsAndReturn is set (that is, when the target is arm or diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index c1812922cc034..ee17f826c7527 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4105,9 +4105,26 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, << From->getSourceRange(); } + // Defer address space conversion to the third conversion. + QualType FromPteeType = From->getType()->getPointeeType(); + QualType ToPteeType = ToType->getPointeeType(); + QualType NewToType = ToType; + if (!FromPteeType.isNull() && !ToPteeType.isNull() && + FromPteeType.getAddressSpace() != ToPteeType.getAddressSpace()) { + NewToType = Context.removeAddrSpaceQualType(ToPteeType); + NewToType = Context.getAddrSpaceQualType(NewToType, + FromPteeType.getAddressSpace()); + if (ToType->isObjCObjectPointerType()) + NewToType = Context.getObjCObjectPointerType(NewToType); + else if (ToType->isBlockPointerType()) + NewToType = Context.getBlockPointerType(NewToType); + else + NewToType = Context.getPointerType(NewToType); + } + CastKind Kind; CXXCastPath BasePath; - if (CheckPointerConversion(From, ToType, Kind, BasePath, CStyle)) + if (CheckPointerConversion(From, NewToType, Kind, BasePath, CStyle)) return ExprError(); // Make sure we extend blocks if necessary. @@ -4118,8 +4135,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, From = E.get(); } if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers()) - CheckObjCConversion(SourceRange(), ToType, From, CCK); - From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK) + CheckObjCConversion(SourceRange(), NewToType, From, CCK); + From = ImpCastExprToType(From, NewToType, Kind, VK_RValue, &BasePath, CCK) .get(); break; } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index fbc8e8e5d23d0..065fd672a194e 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -6653,6 +6653,7 @@ struct IndirectLocalPathEntry { VarInit, LValToRVal, LifetimeBoundCall, + GslReferenceInit, GslPointerInit } Kind; Expr *E; @@ -6783,12 +6784,24 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, LocalVisitor Visit) { - auto VisitPointerArg = [&](const Decl *D, Expr *Arg) { + auto VisitPointerArg = [&](const Decl *D, Expr *Arg, bool Value) { // We are not interested in the temporary base objects of gsl Pointers: // Temp().ptr; // Here ptr might not dangle. if (isa(Arg->IgnoreImpCasts())) return; - Path.push_back({IndirectLocalPathEntry::GslPointerInit, Arg, D}); + // Once we initialized a value with a reference, it can no longer dangle. + if (!Value) { + for (auto It = Path.rbegin(), End = Path.rend(); It != End; ++It) { + if (It->Kind == IndirectLocalPathEntry::GslReferenceInit) + continue; + if (It->Kind == IndirectLocalPathEntry::GslPointerInit) + return; + break; + } + } + Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit + : IndirectLocalPathEntry::GslReferenceInit, + Arg, D}); if (Arg->isGLValue()) visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding, Visit, @@ -6802,18 +6815,21 @@ static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, if (auto *MCE = dyn_cast(Call)) { const auto *MD = cast_or_null(MCE->getDirectCallee()); if (MD && shouldTrackImplicitObjectArg(MD)) - VisitPointerArg(MD, MCE->getImplicitObjectArgument()); + VisitPointerArg(MD, MCE->getImplicitObjectArgument(), + !MD->getReturnType()->isReferenceType()); return; } else if (auto *OCE = dyn_cast(Call)) { FunctionDecl *Callee = OCE->getDirectCallee(); if (Callee && Callee->isCXXInstanceMember() && shouldTrackImplicitObjectArg(cast(Callee))) - VisitPointerArg(Callee, OCE->getArg(0)); + VisitPointerArg(Callee, OCE->getArg(0), + !Callee->getReturnType()->isReferenceType()); return; } else if (auto *CE = dyn_cast(Call)) { FunctionDecl *Callee = CE->getDirectCallee(); if (Callee && shouldTrackFirstArgument(Callee)) - VisitPointerArg(Callee, CE->getArg(0)); + VisitPointerArg(Callee, CE->getArg(0), + !Callee->getReturnType()->isReferenceType()); return; } @@ -6821,7 +6837,7 @@ static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, const auto *Ctor = CCE->getConstructor(); const CXXRecordDecl *RD = Ctor->getParent(); if (CCE->getNumArgs() > 0 && RD->hasAttr()) - VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0]); + VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0], true); } } @@ -7287,6 +7303,7 @@ static SourceRange nextPathEntryRange(const IndirectLocalPath &Path, unsigned I, case IndirectLocalPathEntry::AddressOf: case IndirectLocalPathEntry::LValToRVal: case IndirectLocalPathEntry::LifetimeBoundCall: + case IndirectLocalPathEntry::GslReferenceInit: case IndirectLocalPathEntry::GslPointerInit: // These exist primarily to mark the path as not permitting or // supporting lifetime extension. @@ -7309,7 +7326,8 @@ static bool pathOnlyInitializesGslPointer(IndirectLocalPath &Path) { continue; if (It->Kind == IndirectLocalPathEntry::AddressOf) continue; - return It->Kind == IndirectLocalPathEntry::GslPointerInit; + return It->Kind == IndirectLocalPathEntry::GslPointerInit || + It->Kind == IndirectLocalPathEntry::GslReferenceInit; } return false; } @@ -7532,6 +7550,7 @@ void Sema::checkInitializerLifetime(const InitializedEntity &Entity, case IndirectLocalPathEntry::LifetimeBoundCall: case IndirectLocalPathEntry::GslPointerInit: + case IndirectLocalPathEntry::GslReferenceInit: // FIXME: Consider adding a note for these. break; diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index c6b19a0b195c5..14b443e9dac08 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -917,6 +917,10 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, /*IsVariadic=*/false, /*IsCXXMethod=*/true)); EPI.HasTrailingReturn = true; EPI.TypeQuals.addConst(); + LangAS AS = getDefaultCXXMethodAddrSpace(); + if (AS != LangAS::Default) + EPI.TypeQuals.addAddressSpace(AS); + // C++1y [expr.prim.lambda]: // The lambda return type is 'auto', which is replaced by the // trailing-return type if provided and/or deduced from 'return' diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index df817e6bcef13..d9b6cb6a92153 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -823,7 +823,8 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR, NewOpenCLBuiltin->addAttr(ConstAttr::CreateImplicit(Context)); if (OpenCLBuiltin.IsConv) NewOpenCLBuiltin->addAttr(ConvergentAttr::CreateImplicit(Context)); - if ((GenTypeMaxCnt > 1 || Len > 1) && !S.getLangOpts().OpenCLCPlusPlus) + + if (!S.getLangOpts().OpenCLCPlusPlus) NewOpenCLBuiltin->addAttr(OverloadableAttr::CreateImplicit(Context)); LR.addDecl(NewOpenCLBuiltin); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 59178fb671fb0..2523d7edc3e7d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4538,6 +4538,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPParallelForSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_parallel); + if (LangOpts.OpenMP >= 50) + AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_parallel_sections: Res = ActOnOpenMPParallelSectionsDirective(ClausesWithImplicit, AStmt, @@ -4646,6 +4648,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPTaskLoopSimdDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_taskloop); + if (LangOpts.OpenMP >= 50) + AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_master_taskloop: Res = ActOnOpenMPMasterTaskLoopDirective( @@ -5443,7 +5447,7 @@ void Sema::markOpenMPDeclareVariantFuncsReferenced(SourceLocation Loc, Func->specific_attrs()) { // TODO: add checks for active OpenMP context where possible. Expr *VariantRef = A->getVariantFuncRef(); - auto *DRE = dyn_cast(VariantRef->IgnoreParenImpCasts()); + auto *DRE = cast(VariantRef->IgnoreParenImpCasts()); auto *F = cast(DRE->getDecl()); if (!F->isDefined() && F->isTemplateInstantiation()) InstantiateFunctionDefinition(Loc, F->getFirstDecl()); @@ -10642,7 +10646,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, // A return value of OMPD_unknown signifies that the expression should not // be captured. static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( - OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, + OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, unsigned OpenMPVersion, OpenMPDirectiveKind NameModifier = OMPD_unknown) { OpenMPDirectiveKind CaptureRegion = OMPD_unknown; switch (CKind) { @@ -10677,11 +10681,22 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( if (NameModifier == OMPD_unknown || NameModifier == OMPD_taskloop) CaptureRegion = OMPD_parallel; break; + case OMPD_parallel_for_simd: + if (OpenMPVersion <= 45) + break; + if (NameModifier == OMPD_unknown || NameModifier == OMPD_simd) + CaptureRegion = OMPD_parallel; + break; + case OMPD_taskloop_simd: + if (OpenMPVersion <= 45) + break; + if (NameModifier == OMPD_unknown || NameModifier == OMPD_simd) + CaptureRegion = OMPD_taskloop; + break; case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: case OMPD_parallel_for: - case OMPD_parallel_for_simd: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams: @@ -10691,7 +10706,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_distribute_parallel_for_simd: case OMPD_task: case OMPD_taskloop: - case OMPD_taskloop_simd: case OMPD_master_taskloop: case OMPD_master_taskloop_simd: case OMPD_target_data: @@ -11306,8 +11320,8 @@ OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, ValExpr = Val.get(); OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_if, NameModifier); + CaptureRegion = getOpenMPCaptureRegionForClause( + DKind, OMPC_if, LangOpts.OpenMP, NameModifier); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -11338,7 +11352,8 @@ OMPClause *Sema::ActOnOpenMPFinalClause(Expr *Condition, ValExpr = MakeFullExpr(Val.get()).get(); OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_final); + CaptureRegion = + getOpenMPCaptureRegionForClause(DKind, OMPC_final, LangOpts.OpenMP); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -11423,7 +11438,8 @@ isNonNegativeIntegerValue(Expr *&ValExpr, Sema &SemaRef, OpenMPClauseKind CKind, } if (!BuildCapture) return true; - *CaptureRegion = getOpenMPCaptureRegionForClause(DKind, CKind); + *CaptureRegion = + getOpenMPCaptureRegionForClause(DKind, CKind, SemaRef.LangOpts.OpenMP); if (*CaptureRegion != OMPD_unknown && !SemaRef.CurContext->isDependentContext()) { ValExpr = SemaRef.MakeFullExpr(ValExpr).get(); @@ -11450,7 +11466,7 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads, OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads); + getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads, LangOpts.OpenMP); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -11981,8 +11997,8 @@ OMPClause *Sema::ActOnOpenMPScheduleClause( return nullptr; } } else if (getOpenMPCaptureRegionForClause( - DSAStack->getCurrentDirective(), OMPC_schedule) != - OMPD_unknown && + DSAStack->getCurrentDirective(), OMPC_schedule, + LangOpts.OpenMP) != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -14818,7 +14834,7 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc, OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_device); + getOpenMPCaptureRegionForClause(DKind, OMPC_device, LangOpts.OpenMP); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -16270,7 +16286,7 @@ OMPClause *Sema::ActOnOpenMPNumTeamsClause(Expr *NumTeams, OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams); + getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams, LangOpts.OpenMP); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -16296,8 +16312,8 @@ OMPClause *Sema::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, return nullptr; OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); - OpenMPDirectiveKind CaptureRegion = - getOpenMPCaptureRegionForClause(DKind, OMPC_thread_limit); + OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause( + DKind, OMPC_thread_limit, LangOpts.OpenMP); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; @@ -16422,8 +16438,8 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause( return nullptr; } } else if (getOpenMPCaptureRegionForClause( - DSAStack->getCurrentDirective(), OMPC_dist_schedule) != - OMPD_unknown && + DSAStack->getCurrentDirective(), OMPC_dist_schedule, + LangOpts.OpenMP) != OMPD_unknown && !CurContext->isDependentContext()) { ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector Captures; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index a85fb6c1dc833..e800f7fe74248 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7004,15 +7004,21 @@ Sema::BuildExpressionFromDeclTemplateArgument(const TemplateArgument &Arg, // We might need to perform a trailing qualification conversion, since // the element type on the parameter could be more qualified than the - // element type in the expression we constructed. + // element type in the expression we constructed, and likewise for a + // function conversion. bool ObjCLifetimeConversion; - if (IsQualificationConversion(((Expr*) RefExpr.get())->getType(), + QualType Ignored; + if (IsFunctionConversion(RefExpr.get()->getType(), ParamType, Ignored) || + IsQualificationConversion(RefExpr.get()->getType(), ParamType.getUnqualifiedType(), false, ObjCLifetimeConversion)) - RefExpr = ImpCastExprToType(RefExpr.get(), ParamType.getUnqualifiedType(), CK_NoOp); + RefExpr = ImpCastExprToType(RefExpr.get(), + ParamType.getUnqualifiedType(), CK_NoOp); + // FIXME: We need to perform derived-to-base or base-to-derived + // pointer-to-member conversions here too. assert(!RefExpr.isInvalid() && - Context.hasSameType(((Expr*) RefExpr.get())->getType(), + Context.hasSameType(RefExpr.get()->getType(), ParamType.getUnqualifiedType())); return RefExpr; } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index a67ce648e4057..0ed4b8c86803b 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1576,8 +1576,12 @@ TemplateInstantiator::TransformFunctionTypeParam(ParmVarDecl *OldParm, int indexAdjustment, Optional NumExpansions, bool ExpectParameterPack) { - return SemaRef.SubstParmVarDecl(OldParm, TemplateArgs, indexAdjustment, - NumExpansions, ExpectParameterPack); + auto NewParm = + SemaRef.SubstParmVarDecl(OldParm, TemplateArgs, indexAdjustment, + NumExpansions, ExpectParameterPack); + if (NewParm && SemaRef.getLangOpts().OpenCL) + SemaRef.deduceOpenCLAddressSpace(NewParm); + return NewParm; } QualType @@ -2284,8 +2288,10 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, CheckCompletedCXXClass(Instantiation); // Default arguments are parsed, if not instantiated. We can go instantiate - // default arg exprs for default constructors if necessary now. - ActOnFinishCXXNonNestedClass(Instantiation); + // default arg exprs for default constructors if necessary now. Unless we're + // parsing a class, in which case wait until that's finished. + if (ParsingClassDepth == 0) + ActOnFinishCXXNonNestedClass(); // Instantiate late parsed attributes, and attach them to their decls. // See Sema::InstantiateAttrs diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index c879667333d02..8686341d65f75 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -931,6 +931,9 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, SemaRef.inferObjCARCLifetime(Var)) Var->setInvalidDecl(); + if (SemaRef.getLangOpts().OpenCL) + SemaRef.deduceOpenCLAddressSpace(Var); + // Substitute the nested name specifier, if any. if (SubstQualifier(D, Var)) return nullptr; @@ -3070,7 +3073,9 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( } if (SubstReductionType.isNull()) return nullptr; - bool IsCorrect = !SubstReductionType.isNull(); + Expr *Combiner = D->getCombiner(); + Expr *Init = D->getInitializer(); + bool IsCorrect = true; // Create instantiated copy. std::pair ReductionTypes[] = { std::make_pair(SubstReductionType, D->getLocation())}; @@ -3085,23 +3090,10 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( PrevDeclInScope); auto *NewDRD = cast(DRD.get().getSingleDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD); - if (!RequiresInstantiation) { - if (Expr *Combiner = D->getCombiner()) { - NewDRD->setCombinerData(D->getCombinerIn(), D->getCombinerOut()); - NewDRD->setCombiner(Combiner); - if (Expr *Init = D->getInitializer()) { - NewDRD->setInitializerData(D->getInitOrig(), D->getInitPriv()); - NewDRD->setInitializer(Init, D->getInitializerKind()); - } - } - (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd( - /*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl()); - return NewDRD; - } Expr *SubstCombiner = nullptr; Expr *SubstInitializer = nullptr; // Combiners instantiation sequence. - if (D->getCombiner()) { + if (Combiner) { SemaRef.ActOnOpenMPDeclareReductionCombinerStart( /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( @@ -3113,46 +3105,41 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), ThisContext); - SubstCombiner = SemaRef.SubstExpr(D->getCombiner(), TemplateArgs).get(); + SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get(); SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner); - // Initializers instantiation sequence. - if (D->getInitializer()) { - VarDecl *OmpPrivParm = - SemaRef.ActOnOpenMPDeclareReductionInitializerStart( - /*S=*/nullptr, NewDRD); - SemaRef.CurrentInstantiationScope->InstantiatedLocal( - cast(D->getInitOrig())->getDecl(), - cast(NewDRD->getInitOrig())->getDecl()); - SemaRef.CurrentInstantiationScope->InstantiatedLocal( - cast(D->getInitPriv())->getDecl(), - cast(NewDRD->getInitPriv())->getDecl()); - if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) { - SubstInitializer = - SemaRef.SubstExpr(D->getInitializer(), TemplateArgs).get(); - } else { - auto *OldPrivParm = - cast(cast(D->getInitPriv())->getDecl()); - IsCorrect = IsCorrect && OldPrivParm->hasInit(); - if (IsCorrect) - SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm, - TemplateArgs); - } - SemaRef.ActOnOpenMPDeclareReductionInitializerEnd( - NewDRD, SubstInitializer, OmpPrivParm); - } - IsCorrect = - IsCorrect && SubstCombiner && - (!D->getInitializer() || - (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit && - SubstInitializer) || - (D->getInitializerKind() != OMPDeclareReductionDecl::CallInit && - !SubstInitializer && !SubstInitializer)); - } else { - IsCorrect = false; } - - (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd(/*S=*/nullptr, DRD, - IsCorrect); + // Initializers instantiation sequence. + if (Init) { + VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart( + /*S=*/nullptr, NewDRD); + SemaRef.CurrentInstantiationScope->InstantiatedLocal( + cast(D->getInitOrig())->getDecl(), + cast(NewDRD->getInitOrig())->getDecl()); + SemaRef.CurrentInstantiationScope->InstantiatedLocal( + cast(D->getInitPriv())->getDecl(), + cast(NewDRD->getInitPriv())->getDecl()); + if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) { + SubstInitializer = SemaRef.SubstExpr(Init, TemplateArgs).get(); + } else { + auto *OldPrivParm = + cast(cast(D->getInitPriv())->getDecl()); + IsCorrect = IsCorrect && OldPrivParm->hasInit(); + if (IsCorrect) + SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm, + TemplateArgs); + } + SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD, SubstInitializer, + OmpPrivParm); + } + IsCorrect = IsCorrect && SubstCombiner && + (!Init || + (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit && + SubstInitializer) || + (D->getInitializerKind() != OMPDeclareReductionDecl::CallInit && + !SubstInitializer)); + + (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd( + /*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl()); return NewDRD; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 5f3b2d5600d6d..7de04e1228d4b 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1983,6 +1983,19 @@ bool Sema::CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc) { return true; } +// Helper to deduce addr space of a pointee type in OpenCL mode. +static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) { + if (!PointeeType->isUndeducedAutoType() && !PointeeType->isDependentType() && + !PointeeType->isSamplerT() && + !PointeeType.getQualifiers().hasAddressSpace()) + PointeeType = S.getASTContext().getAddrSpaceQualType( + PointeeType, + S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200 + ? LangAS::opencl_generic + : LangAS::opencl_private); + return PointeeType; +} + /// Build a pointer type. /// /// \param T The type to which we'll be building a pointer. @@ -2019,6 +2032,9 @@ QualType Sema::BuildPointerType(QualType T, if (getLangOpts().ObjCAutoRefCount) T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ false); + if (getLangOpts().OpenCL) + T = deduceOpenCLPointeeAddrSpace(*this, T); + // Build the pointer type. return Context.getPointerType(T); } @@ -2079,6 +2095,9 @@ QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue, if (getLangOpts().ObjCAutoRefCount) T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ true); + if (getLangOpts().OpenCL) + T = deduceOpenCLPointeeAddrSpace(*this, T); + // Handle restrict on references. if (LValueRef) return Context.getLValueReferenceType(T, SpelledAsLValue); @@ -2664,6 +2683,9 @@ QualType Sema::BuildBlockPointerType(QualType T, if (checkQualifiedFunction(*this, T, Loc, QFK_BlockPointer)) return QualType(); + if (getLangOpts().OpenCL) + T = deduceOpenCLPointeeAddrSpace(*this, T); + return Context.getBlockPointerType(T); } @@ -4808,6 +4830,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = EI; EPI.Variadic = FTI.isVariadic; + EPI.EllipsisLoc = FTI.getEllipsisLoc(); EPI.HasTrailingReturn = FTI.hasTrailingReturnType(); EPI.TypeQuals.addCVRUQualifiers( FTI.MethodQualifiers ? FTI.MethodQualifiers->getTypeQualifiers() @@ -4947,7 +4970,9 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, .getScopeRep() ->getKind() == NestedNameSpecifier::TypeSpec) || state.getDeclarator().getContext() == - DeclaratorContext::MemberContext; + DeclaratorContext::MemberContext || + state.getDeclarator().getContext() == + DeclaratorContext::LambdaExprContext; }; if (state.getSema().getLangOpts().OpenCLCPlusPlus && IsClassMember()) { @@ -4966,7 +4991,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // If a class member function's address space is not set, set it to // __generic. LangAS AS = - (ASIdx == LangAS::Default ? LangAS::opencl_generic : ASIdx); + (ASIdx == LangAS::Default ? S.getDefaultCXXMethodAddrSpace() + : ASIdx); EPI.TypeQuals.addAddressSpace(AS); } T = Context.getFunctionType(T, ParamTys, EPI); @@ -7472,137 +7498,6 @@ static void HandleOpenCLAccessAttr(QualType &CurType, const ParsedAttr &Attr, } } -static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State, - QualType &T, TypeAttrLocation TAL) { - Declarator &D = State.getDeclarator(); - - // Handle the cases where address space should not be deduced. - // - // The pointee type of a pointer type is always deduced since a pointer always - // points to some memory location which should has an address space. - // - // There are situations that at the point of certain declarations, the address - // space may be unknown and better to be left as default. For example, when - // defining a typedef or struct type, they are not associated with any - // specific address space. Later on, they may be used with any address space - // to declare a variable. - // - // The return value of a function is r-value, therefore should not have - // address space. - // - // The void type does not occupy memory, therefore should not have address - // space, except when it is used as a pointee type. - // - // Since LLVM assumes function type is in default address space, it should not - // have address space. - auto ChunkIndex = State.getCurrentChunkIndex(); - bool IsPointee = - ChunkIndex > 0 && - (D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::Pointer || - D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::Reference || - D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::BlockPointer); - // For pointers/references to arrays the next chunk is always an array - // followed by any number of parentheses. - if (!IsPointee && ChunkIndex > 1) { - auto AdjustedCI = ChunkIndex - 1; - if (D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Array) - AdjustedCI--; - // Skip over all parentheses. - while (AdjustedCI > 0 && - D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Paren) - AdjustedCI--; - if (D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Pointer || - D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Reference) - IsPointee = true; - } - bool IsFuncReturnType = - ChunkIndex > 0 && - D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::Function; - bool IsFuncType = - ChunkIndex < D.getNumTypeObjects() && - D.getTypeObject(ChunkIndex).Kind == DeclaratorChunk::Function; - if ( // Do not deduce addr space for function return type and function type, - // otherwise it will fail some sema check. - IsFuncReturnType || IsFuncType || - // Do not deduce addr space for member types of struct, except the pointee - // type of a pointer member type or static data members. - (D.getContext() == DeclaratorContext::MemberContext && - (!IsPointee && - D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static)) || - // Do not deduce addr space of non-pointee in type alias because it - // doesn't define any object. - (D.getContext() == DeclaratorContext::AliasDeclContext && !IsPointee) || - // Do not deduce addr space for types used to define a typedef and the - // typedef itself, except the pointee type of a pointer type which is used - // to define the typedef. - (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef && - !IsPointee) || - // Do not deduce addr space of the void type, e.g. in f(void), otherwise - // it will fail some sema check. - (T->isVoidType() && !IsPointee) || - // Do not deduce addr spaces for dependent types because they might end - // up instantiating to a type with an explicit address space qualifier. - // Except for pointer or reference types because the addr space in - // template argument can only belong to a pointee. - (T->isDependentType() && !T->isPointerType() && !T->isReferenceType()) || - // Do not deduce addr space of decltype because it will be taken from - // its argument. - T->isDecltypeType() || - // OpenCL spec v2.0 s6.9.b: - // The sampler type cannot be used with the __local and __global address - // space qualifiers. - // OpenCL spec v2.0 s6.13.14: - // Samplers can also be declared as global constants in the program - // source using the following syntax. - // const sampler_t = - // In codegen, file-scope sampler type variable has special handing and - // does not rely on address space qualifier. On the other hand, deducing - // address space of const sampler file-scope variable as global address - // space causes spurious diagnostic about __global address space - // qualifier, therefore do not deduce address space of file-scope sampler - // type variable. - (D.getContext() == DeclaratorContext::FileContext && T->isSamplerT())) - return; - - LangAS ImpAddr = LangAS::Default; - // Put OpenCL automatic variable in private address space. - // OpenCL v1.2 s6.5: - // The default address space name for arguments to a function in a - // program, or local variables of a function is __private. All function - // arguments shall be in the __private address space. - if (State.getSema().getLangOpts().OpenCLVersion <= 120 && - !State.getSema().getLangOpts().OpenCLCPlusPlus) { - ImpAddr = LangAS::opencl_private; - } else { - // If address space is not set, OpenCL 2.0 defines non private default - // address spaces for some cases: - // OpenCL 2.0, section 6.5: - // The address space for a variable at program scope or a static variable - // inside a function can either be __global or __constant, but defaults to - // __global if not specified. - // (...) - // Pointers that are declared without pointing to a named address space - // point to the generic address space. - if (IsPointee) { - ImpAddr = LangAS::opencl_generic; - } else { - if (D.getContext() == DeclaratorContext::TemplateArgContext) { - // Do not deduce address space for non-pointee type in template arg. - } else if (D.getContext() == DeclaratorContext::FileContext) { - ImpAddr = LangAS::opencl_global; - } else { - if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static || - D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_extern) { - ImpAddr = LangAS::opencl_global; - } else { - ImpAddr = LangAS::opencl_private; - } - } - } - } - T = State.getSema().Context.getAddrSpaceQualType(T, ImpAddr); -} - static void HandleLifetimeBoundAttr(TypeProcessingState &State, QualType &CurType, ParsedAttr &Attr) { @@ -7667,7 +7562,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, // Otherwise, only consider type processing for a C++11 attribute if // it's actually been applied to a type. // We also allow C++11 address_space and - // opencl language address space attributes to pass through. + // OpenCL language address space attributes to pass through. continue; } } @@ -7847,8 +7742,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, if (!state.getSema().getLangOpts().OpenCL || type.getAddressSpace() != LangAS::Default) return; - - deduceOpenCLImplicitAddrSpace(state, type, TAL); } void Sema::completeExprArrayBound(Expr *E) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3592461f8e91e..812d3a1283728 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4583,14 +4583,6 @@ QualType TreeTransform::TransformDecayedType(TypeLocBuilder &TLB, return Result; } -/// Helper to deduce addr space of a pointee type in OpenCL mode. -/// If the type is updated it will be overwritten in PointeeType param. -inline void deduceOpenCLPointeeAddrSpace(Sema &SemaRef, QualType &PointeeType) { - if (PointeeType.getAddressSpace() == LangAS::Default) - PointeeType = SemaRef.Context.getAddrSpaceQualType(PointeeType, - LangAS::opencl_generic); -} - template QualType TreeTransform::TransformPointerType(TypeLocBuilder &TLB, PointerTypeLoc TL) { @@ -4599,9 +4591,6 @@ QualType TreeTransform::TransformPointerType(TypeLocBuilder &TLB, if (PointeeType.isNull()) return QualType(); - if (SemaRef.getLangOpts().OpenCL) - deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType); - QualType Result = TL.getType(); if (PointeeType->getAs()) { // A dependent pointer type 'T *' has is being transformed such @@ -4640,9 +4629,6 @@ TreeTransform::TransformBlockPointerType(TypeLocBuilder &TLB, if (PointeeType.isNull()) return QualType(); - if (SemaRef.getLangOpts().OpenCL) - deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType); - QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || PointeeType != TL.getPointeeLoc().getType()) { @@ -4672,9 +4658,6 @@ TreeTransform::TransformReferenceType(TypeLocBuilder &TLB, if (PointeeType.isNull()) return QualType(); - if (SemaRef.getLangOpts().OpenCL) - deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType); - QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || PointeeType != T->getPointeeTypeAsWritten()) { diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 8991a39a70679..d989f46c4ab4b 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,6 +424,8 @@ namespace clang { template void mergeMergeable(Mergeable *D); + void mergeMergeable(LifetimeExtendedTemporaryDecl *D); + void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -884,6 +886,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); + FD->setUsesFPIntrin(Record.readInt()); switch ((FunctionDecl::TemplatedKind)Record.readInt()) { case FunctionDecl::TK_NonTemplate: @@ -2358,6 +2361,7 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); + mergeMergeable(D); } std::pair @@ -2555,6 +2559,25 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } +/// Attempts to merge LifetimeExtendedTemporaryDecl with +/// identical class definitions from two different modules. +void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { + // If modules are not available, there is no reason to perform this merge. + if (!Reader.getContext().getLangOpts().Modules) + return; + + LifetimeExtendedTemporaryDecl *LETDecl = D; + + LifetimeExtendedTemporaryDecl *&LookupResult = + Reader.LETemporaryForMerging[std::make_pair( + LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; + if (LookupResult) + Reader.getContext().setPrimaryMergedDecl(LETDecl, + LookupResult->getCanonicalDecl()); + else + LookupResult = LETDecl; +} + /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 51902a607ca12..38eb64e52e4ac 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -559,6 +559,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.AddSourceLocation(D->getEndLoc()); Record.push_back(D->getODRHash()); + Record.push_back(D->usesFPIntrin()); Record.push_back(D->getTemplatedKind()); switch (D->getTemplatedKind()) { diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index a824499518730..01c7afe520410 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -2525,19 +2525,18 @@ MallocChecker::LeakInfo MallocChecker::getAllocationSite(const ExplodedNode *N, // Find the most recent expression bound to the symbol in the current // context. - if (!ReferenceRegion) { - if (const MemRegion *MR = C.getLocationRegionIfPostStore(N)) { - SVal Val = State->getSVal(MR); - if (Val.getAsLocSymbol() == Sym) { - const VarRegion* VR = MR->getBaseRegion()->getAs(); - // Do not show local variables belonging to a function other than - // where the error is reported. - if (!VR || - (VR->getStackFrame() == LeakContext->getStackFrame())) - ReferenceRegion = MR; - } + if (!ReferenceRegion) { + if (const MemRegion *MR = C.getLocationRegionIfPostStore(N)) { + SVal Val = State->getSVal(MR); + if (Val.getAsLocSymbol() == Sym) { + const VarRegion *VR = MR->getBaseRegion()->getAs(); + // Do not show local variables belonging to a function other than + // where the error is reported. + if (!VR || (VR->getStackFrame() == LeakContext->getStackFrame())) + ReferenceRegion = MR; } } + } // Allocation node, is the last node in the current or parent context in // which the symbol was tracked. diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 7a0dda563282d..47099f2afb6a4 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -64,6 +64,7 @@ class StreamChecker : public Checker Callbacks = { {{"fopen"}, &StreamChecker::evalFopen}, + {{"freopen", 3}, &StreamChecker::evalFreopen}, {{"tmpfile"}, &StreamChecker::evalFopen}, {{"fclose", 1}, &StreamChecker::evalFclose}, {{"fread", 4}, @@ -90,6 +91,7 @@ class StreamChecker : public Checker(Call.getOriginExpr()); + if (!CE) + return; + + Optional StreamVal = Call.getArgSVal(2).getAs(); + if (!StreamVal) + return; + // Do not allow NULL as passed stream pointer. + // This is not specified in the man page but may crash on some system. + checkNullStream(*StreamVal, C, State); + // Check if error was generated. + if (C.isDifferent()) + return; + + SymbolRef StreamSym = StreamVal->getAsSymbol(); + // Do not care about special values for stream ("(FILE *)0x12345"?). + if (!StreamSym) + return; + + // Generate state for non-failed case. + // Return value is the passed stream pointer. + // According to the documentations, the stream is closed first + // but any close error is ignored. The state changes to (or remains) opened. + ProgramStateRef StateRetNotNull = + State->BindExpr(CE, C.getLocationContext(), *StreamVal); + // Generate state for NULL return value. + // Stream switches to OpenFailed state. + ProgramStateRef StateRetNull = State->BindExpr(CE, C.getLocationContext(), + C.getSValBuilder().makeNull()); + + StateRetNotNull = + StateRetNotNull->set(StreamSym, StreamState::getOpened()); + StateRetNull = + StateRetNull->set(StreamSym, StreamState::getOpenFailed()); + + C.addTransition(StateRetNotNull); + C.addTransition(StateRetNull); +} + void StreamChecker::evalFclose(const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); if (checkDoubleClose(Call, C, State)) diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index 05061f0a10a80..59c990daaa29f 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -17,6 +17,7 @@ add_clang_library(clangTooling CommonOptionsParser.cpp CompilationDatabase.cpp Execution.cpp + ExpandResponseFilesCompilationDatabase.cpp FileMatchTrie.cpp FixIt.cpp GuessTargetAndModeCompilationDatabase.cpp diff --git a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp new file mode 100644 index 0000000000000..84936ba05b20f --- /dev/null +++ b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp @@ -0,0 +1,88 @@ +//===- ExpandResponseFileCompilationDataBase.cpp --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +namespace clang { +namespace tooling { +namespace { + +class ExpandResponseFilesDatabase : public CompilationDatabase { +public: + ExpandResponseFilesDatabase( + std::unique_ptr Base, + llvm::cl::TokenizerCallback Tokenizer, + llvm::IntrusiveRefCntPtr FS) + : Base(std::move(Base)), Tokenizer(Tokenizer), FS(std::move(FS)) { + assert(this->Base != nullptr); + assert(this->Tokenizer != nullptr); + assert(this->FS != nullptr); + } + + std::vector getAllFiles() const override { + return Base->getAllFiles(); + } + + std::vector + getCompileCommands(StringRef FilePath) const override { + return expand(Base->getCompileCommands(FilePath)); + } + + std::vector getAllCompileCommands() const override { + return expand(Base->getAllCompileCommands()); + } + +private: + std::vector expand(std::vector Cmds) const { + for (auto &Cmd : Cmds) { + bool SeenRSPFile = false; + llvm::SmallVector Argv; + Argv.reserve(Cmd.CommandLine.size()); + for (auto &Arg : Cmd.CommandLine) { + Argv.push_back(Arg.c_str()); + SeenRSPFile |= Arg.front() == '@'; + } + if (!SeenRSPFile) + continue; + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver(Alloc); + llvm::cl::ExpandResponseFiles(Saver, Tokenizer, Argv, false, false, *FS, + llvm::StringRef(Cmd.Directory)); + Cmd.CommandLine.assign(Argv.begin(), Argv.end()); + } + return Cmds; + } + +private: + std::unique_ptr Base; + llvm::cl::TokenizerCallback Tokenizer; + llvm::IntrusiveRefCntPtr FS; +}; + +} // namespace + +std::unique_ptr +expandResponseFiles(std::unique_ptr Base, + llvm::IntrusiveRefCntPtr FS) { + auto Tokenizer = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows() + ? llvm::cl::TokenizeWindowsCommandLine + : llvm::cl::TokenizeGNUCommandLine; + return std::make_unique( + std::move(Base), Tokenizer, std::move(FS)); +} + +} // namespace tooling +} // namespace clang diff --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp b/clang/lib/Tooling/JSONCompilationDatabase.cpp index f19a0f7550b96..04dd4dbf62484 100644 --- a/clang/lib/Tooling/JSONCompilationDatabase.cpp +++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include @@ -168,7 +169,8 @@ class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin { auto Base = JSONCompilationDatabase::loadFromFile( JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect); return Base ? inferTargetAndDriverMode( - inferMissingCompileCommands(std::move(Base))) + inferMissingCompileCommands(expandResponseFiles( + std::move(Base), llvm::vfs::getRealFileSystem()))) : nullptr; } }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index dddc265c8c416..67081497d04c0 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/Basic/LLVM.h" @@ -56,6 +58,14 @@ class syntax::TreeBuilder { /// Range. void foldNode(llvm::ArrayRef Range, syntax::Tree *New); + /// Must be called with the range of each `DeclaratorDecl`. Ensures the + /// corresponding declarator nodes are covered by `SimpleDeclaration`. + void noticeDeclaratorRange(llvm::ArrayRef Range); + + /// Notifies that we should not consume trailing semicolon when computing + /// token range of \p D. + void noticeDeclaratorWithoutSemicolon(Decl *D); + /// Mark the \p Child node with a corresponding \p Role. All marked children /// should be consumed by foldNode. /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), @@ -66,7 +76,7 @@ class syntax::TreeBuilder { void markExprChild(Expr *Child, NodeRole Role); /// Set role for a token starting at \p Loc. - void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); + void markChildToken(SourceLocation Loc, NodeRole R); /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { @@ -94,7 +104,14 @@ class syntax::TreeBuilder { return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } llvm::ArrayRef getRange(const Decl *D) const { - return getRange(D->getBeginLoc(), D->getEndLoc()); + auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); + if (llvm::isa(D)) + return Tokens; + if (DeclsWithoutSemicolons.count(D)) + return Tokens; + // FIXME: do not consume trailing semicolon on function definitions. + // Most declarations own a semicolon in syntax trees, but not in clang AST. + return withTrailingSemicolon(Tokens); } llvm::ArrayRef getExprRange(const Expr *E) const { return getRange(E->getBeginLoc(), E->getEndLoc()); @@ -108,14 +125,22 @@ class syntax::TreeBuilder { // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and // all statements that end with those. Consume this semicolon here. - // - // (!) statements never consume 'eof', so looking at the next token is ok. + if (Tokens.back().kind() == tok::semi) + return Tokens; + return withTrailingSemicolon(Tokens); + } + +private: + llvm::ArrayRef + withTrailingSemicolon(llvm::ArrayRef Tokens) const { + assert(!Tokens.empty()); + assert(Tokens.back().kind() != tok::eof); + // (!) we never consume 'eof', so looking at the next token is ok. if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); return Tokens; } -private: /// Finds a token starting at \p L. The token must exist. const syntax::Token *findToken(SourceLocation L) const; @@ -136,6 +161,8 @@ class syntax::TreeBuilder { {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); } + ~Forest() { assert(DelayedFolds.empty()); } + void assignRole(llvm::ArrayRef Range, syntax::NodeRole Role) { assert(!Range.empty()); @@ -148,30 +175,46 @@ class syntax::TreeBuilder { It->second.Role = Role; } - /// Add \p Node to the forest and fill its children nodes based on the \p - /// NodeRange. - void foldChildren(llvm::ArrayRef NodeTokens, + /// Add \p Node to the forest and attach child nodes based on \p Tokens. + void foldChildren(llvm::ArrayRef Tokens, syntax::Tree *Node) { - assert(!NodeTokens.empty()); - assert(Node->firstChild() == nullptr && "node already has children"); - - auto *FirstToken = NodeTokens.begin(); - auto BeginChildren = Trees.lower_bound(FirstToken); - assert(BeginChildren != Trees.end() && - BeginChildren->first == FirstToken && - "fold crosses boundaries of existing subtrees"); - auto EndChildren = Trees.lower_bound(NodeTokens.end()); - assert((EndChildren == Trees.end() || - EndChildren->first == NodeTokens.end()) && - "fold crosses boundaries of existing subtrees"); + // Execute delayed folds inside `Tokens`. + auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); + auto It = BeginExecuted; + for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) + foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End), + It->second.Node); + DelayedFolds.erase(BeginExecuted, It); + + // Attach children to `Node`. + foldChildrenEager(Tokens, Node); + } - // (!) we need to go in reverse order, because we can only prepend. - for (auto It = EndChildren; It != BeginChildren; --It) - Node->prependChildLowLevel(std::prev(It)->second.Node, - std::prev(It)->second.Role); + /// Schedule a call to `foldChildren` that will only be executed when + /// containing node is folded. The range of delayed nodes can be extended by + /// calling `extendDelayedFold`. Only one delayed node for each starting + /// token is allowed. + void foldChildrenDelayed(llvm::ArrayRef Tokens, + syntax::Tree *Node) { + assert(!Tokens.empty()); + bool Inserted = + DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) + .second; + (void)Inserted; + assert(Inserted && "Multiple delayed folds start at the same token"); + } - Trees.erase(BeginChildren, EndChildren); - Trees.insert({FirstToken, NodeAndRole(Node)}); + /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends + /// its endpoint to `ExtendedRange.end()` and returns true. + /// Otherwise, returns false. + bool extendDelayedFold(llvm::ArrayRef ExtendedRange) { + assert(!ExtendedRange.empty()); + auto It = DelayedFolds.find(ExtendedRange.data()); + if (It == DelayedFolds.end()) + return false; + assert(It->second.End <= ExtendedRange.end()); + It->second.End = ExtendedRange.end(); + return true; } // EXPECTS: all tokens were consumed and are owned by a single root node. @@ -199,6 +242,30 @@ class syntax::TreeBuilder { } private: + /// Implementation detail of `foldChildren`, does acutal folding ignoring + /// delayed folds. + void foldChildrenEager(llvm::ArrayRef Tokens, + syntax::Tree *Node) { + assert(Node->firstChild() == nullptr && "node already has children"); + + auto *FirstToken = Tokens.begin(); + auto BeginChildren = Trees.lower_bound(FirstToken); + assert((BeginChildren == Trees.end() || + BeginChildren->first == FirstToken) && + "fold crosses boundaries of existing subtrees"); + auto EndChildren = Trees.lower_bound(Tokens.end()); + assert( + (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && + "fold crosses boundaries of existing subtrees"); + + // (!) we need to go in reverse order, because we can only prepend. + for (auto It = EndChildren; It != BeginChildren; --It) + Node->prependChildLowLevel(std::prev(It)->second.Node, + std::prev(It)->second.Role); + + Trees.erase(BeginChildren, EndChildren); + Trees.insert({FirstToken, NodeAndRole(Node)}); + } /// A with a role that should be assigned to it when adding to a parent. struct NodeAndRole { explicit NodeAndRole(syntax::Node *Node) @@ -209,9 +276,18 @@ class syntax::TreeBuilder { }; /// Maps from the start token to a subtree starting at that token. + /// Keys in the map are pointers into the array of expanded tokens, so + /// pointer order corresponds to the order of preprocessor tokens. /// FIXME: storing the end tokens is redundant. /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. std::map Trees; + + /// See documentation of `foldChildrenDelayed` for details. + struct DelayedFold { + const syntax::Token *End = nullptr; + syntax::Tree *Node = nullptr; + }; + std::map DelayedFolds; }; /// For debugging purposes. @@ -219,6 +295,7 @@ class syntax::TreeBuilder { syntax::Arena &Arena; Forest Pending; + llvm::DenseSet DeclsWithoutSemicolons; }; namespace { @@ -229,20 +306,30 @@ class BuildTreeVisitor : public RecursiveASTVisitor { bool shouldTraversePostOrder() const { return true; } - bool TraverseDecl(Decl *D) { - if (!D || isa(D)) - return RecursiveASTVisitor::TraverseDecl(D); - if (!llvm::isa(D->getDeclContext())) - return true; // Only build top-level decls for now, do not recurse. - return RecursiveASTVisitor::TraverseDecl(D); + bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { + // Ensure declarators are covered by SimpleDeclaration. + Builder.noticeDeclaratorRange(Builder.getRange(D)); + // FIXME: build nodes for the declarator too. + return true; + } + bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { + // Also a declarator. + Builder.noticeDeclaratorRange(Builder.getRange(D)); + // FIXME: build nodes for the declarator too. + return true; } bool VisitDecl(Decl *D) { - assert(llvm::isa(D->getDeclContext()) && - "expected a top-level decl"); assert(!D->isImplicit()); Builder.foldNode(Builder.getRange(D), - new (allocator()) syntax::TopLevelDeclaration()); + new (allocator()) syntax::UnknownDeclaration()); + return true; + } + + bool WalkUpFromTagDecl(TagDecl *C) { + // Avoid building UnknownDeclaration here, syntatically 'struct X {}' and + // similar are part of declaration specifiers and do not introduce a new + // top-level declaration. return true; } @@ -255,11 +342,10 @@ class BuildTreeVisitor : public RecursiveASTVisitor { bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole; - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen); + Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); for (auto *Child : S->body()) Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); - Builder.markChildToken(S->getRBracLoc(), tok::r_brace, - NodeRole::CloseParen); + Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::CompoundStatement); @@ -290,7 +376,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool TraverseStmt(Stmt *S) { - if (auto *E = llvm::dyn_cast_or_null(S)) { + if (auto *DS = llvm::dyn_cast_or_null(S)) { + // We want to consume the semicolon, make sure SimpleDeclaration does not. + for (auto *D : DS->decls()) + Builder.noticeDeclaratorWithoutSemicolon(D); + } else if (auto *E = llvm::dyn_cast_or_null(S)) { // (!) do not recurse into subexpressions. // we do not have syntax trees for expressions yet, so we only want to see // the first top-level expression. @@ -323,7 +413,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromSwitchStmt(SwitchStmt *S) { - Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch, + Builder.markChildToken(S->getSwitchLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), @@ -332,7 +422,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromCaseStmt(CaseStmt *S) { - Builder.markChildToken(S->getKeywordLoc(), tok::kw_case, + Builder.markChildToken(S->getKeywordLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); @@ -342,7 +432,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromDefaultStmt(DefaultStmt *S) { - Builder.markChildToken(S->getKeywordLoc(), tok::kw_default, + Builder.markChildToken(S->getKeywordLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), @@ -351,11 +441,10 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromIfStmt(IfStmt *S) { - Builder.markChildToken(S->getIfLoc(), tok::kw_if, - syntax::NodeRole::IntroducerKeyword); + Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getThen(), syntax::NodeRole::IfStatement_thenStatement); - Builder.markChildToken(S->getElseLoc(), tok::kw_else, + Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::IfStatement_elseKeyword); Builder.markStmtChild(S->getElse(), syntax::NodeRole::IfStatement_elseStatement); @@ -365,8 +454,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromForStmt(ForStmt *S) { - Builder.markChildToken(S->getForLoc(), tok::kw_for, - syntax::NodeRole::IntroducerKeyword); + Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::ForStatement); @@ -374,7 +462,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromWhileStmt(WhileStmt *S) { - Builder.markChildToken(S->getWhileLoc(), tok::kw_while, + Builder.markChildToken(S->getWhileLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), @@ -383,7 +471,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromContinueStmt(ContinueStmt *S) { - Builder.markChildToken(S->getContinueLoc(), tok::kw_continue, + Builder.markChildToken(S->getContinueLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::ContinueStatement); @@ -391,7 +479,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromBreakStmt(BreakStmt *S) { - Builder.markChildToken(S->getBreakLoc(), tok::kw_break, + Builder.markChildToken(S->getBreakLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::BreakStatement); @@ -399,7 +487,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromReturnStmt(ReturnStmt *S) { - Builder.markChildToken(S->getReturnLoc(), tok::kw_return, + Builder.markChildToken(S->getReturnLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnStatement_value); @@ -409,8 +497,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { - Builder.markChildToken(S->getForLoc(), tok::kw_for, - syntax::NodeRole::IntroducerKeyword); + Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::RangeBasedForStatement); @@ -431,8 +518,19 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, Pending.foldChildren(Range, New); } -void syntax::TreeBuilder::markChildToken(SourceLocation Loc, - tok::TokenKind Kind, NodeRole Role) { +void syntax::TreeBuilder::noticeDeclaratorRange( + llvm::ArrayRef Range) { + if (Pending.extendDelayedFold(Range)) + return; + Pending.foldChildrenDelayed(Range, + new (allocator()) syntax::SimpleDeclaration); +} + +void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { + DeclsWithoutSemicolons.insert(D); +} + +void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { if (Loc.isInvalid()) return; Pending.assignRole(*findToken(Loc), Role); diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 776330ab585fc..b2ed4ffa22c2b 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -16,8 +16,6 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "Leaf"; case NodeKind::TranslationUnit: return OS << "TranslationUnit"; - case NodeKind::TopLevelDeclaration: - return OS << "TopLevelDeclaration"; case NodeKind::UnknownExpression: return OS << "UnknownExpression"; case NodeKind::UnknownStatement: @@ -50,6 +48,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "ExpressionStatement"; case NodeKind::CompoundStatement: return OS << "CompoundStatement"; + case NodeKind::UnknownDeclaration: + return OS << "UnknownDeclaration"; + case NodeKind::SimpleDeclaration: + return OS << "SimpleDeclaration"; } llvm_unreachable("unknown node kind"); } diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index a2c3bc137d6ba..5941507e086d2 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -119,6 +119,22 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { return Text.substr(Begin, length()); } +llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { + if (R.isInvalid()) + return {}; + const Token *Begin = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const Token *End = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/test/AST/Interp/cond.cpp b/clang/test/AST/Interp/cond.cpp index 8a5a318c216d9..1fc69ed333e15 100644 --- a/clang/test/AST/Interp/cond.cpp +++ b/clang/test/AST/Interp/cond.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fforce-experimental-new-constant-interpreter %s -verify +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fexperimental-new-constant-interpreter %s -verify // RUN: %clang_cc1 -std=c++17 -fsyntax-only %s -verify // expected-no-diagnostics diff --git a/clang/test/AST/ast-dump-file-line-json.c b/clang/test/AST/ast-dump-file-line-json.c new file mode 100644 index 0000000000000..89807cb8274ed --- /dev/null +++ b/clang/test/AST/ast-dump-file-line-json.c @@ -0,0 +1,309 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump=json %s | FileCheck %s + +#line 4 "test.c" +int a; + +#line 32 "bar.h" +int b; +int c; + +#line 11 "test.c" +int d; +int e; +// NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + + +// CHECK: "kind": "TranslationUnitDecl", +// CHECK-NEXT: "loc": {}, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": {}, +// CHECK-NEXT: "end": {} +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "TypedefDecl", +// CHECK-NEXT: "loc": {}, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": {}, +// CHECK-NEXT: "end": {} +// CHECK-NEXT: }, +// CHECK-NEXT: "isImplicit": true, +// CHECK-NEXT: "name": "__int128_t", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "__int128" +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "BuiltinType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "__int128" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "TypedefDecl", +// CHECK-NEXT: "loc": {}, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": {}, +// CHECK-NEXT: "end": {} +// CHECK-NEXT: }, +// CHECK-NEXT: "isImplicit": true, +// CHECK-NEXT: "name": "__uint128_t", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "unsigned __int128" +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "BuiltinType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "unsigned __int128" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "TypedefDecl", +// CHECK-NEXT: "loc": {}, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": {}, +// CHECK-NEXT: "end": {} +// CHECK-NEXT: }, +// CHECK-NEXT: "isImplicit": true, +// CHECK-NEXT: "name": "__NSConstantString", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "struct __NSConstantString_tag" +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "RecordType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "struct __NSConstantString_tag" +// CHECK-NEXT: }, +// CHECK-NEXT: "decl": { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "RecordDecl", +// CHECK-NEXT: "name": "__NSConstantString_tag" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "TypedefDecl", +// CHECK-NEXT: "loc": {}, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": {}, +// CHECK-NEXT: "end": {} +// CHECK-NEXT: }, +// CHECK-NEXT: "isImplicit": true, +// CHECK-NEXT: "name": "__builtin_ms_va_list", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "char *" +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "PointerType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "char *" +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "BuiltinType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "char" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "TypedefDecl", +// CHECK-NEXT: "loc": {}, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": {}, +// CHECK-NEXT: "end": {} +// CHECK-NEXT: }, +// CHECK-NEXT: "isImplicit": true, +// CHECK-NEXT: "name": "__builtin_va_list", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "struct __va_list_tag [1]" +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "ConstantArrayType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "struct __va_list_tag [1]" +// CHECK-NEXT: }, +// CHECK-NEXT: "size": 1, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "RecordType", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "struct __va_list_tag" +// CHECK-NEXT: }, +// CHECK-NEXT: "decl": { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "RecordDecl", +// CHECK-NEXT: "name": "__va_list_tag" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 105, +// CHECK-NEXT: "file": "{{.*}}", +// CHECK-NEXT: "line": 4, +// CHECK-NEXT: "presumedFile": "test.c", +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 101, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 105, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "a", +// CHECK-NEXT: "mangledName": "a", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "int" +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 130, +// CHECK-NEXT: "line": 7, +// CHECK-NEXT: "presumedFile": "bar.h", +// CHECK-NEXT: "presumedLine": 32, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 126, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 130, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "b", +// CHECK-NEXT: "mangledName": "b", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "int" +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 137, +// CHECK-NEXT: "line": 8, +// CHECK-NEXT: "presumedLine": 33, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 133, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 137, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "c", +// CHECK-NEXT: "mangledName": "c", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "int" +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 163, +// CHECK-NEXT: "line": 11, +// CHECK-NEXT: "presumedFile": "test.c", +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 159, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 163, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "d", +// CHECK-NEXT: "mangledName": "d", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "int" +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 170, +// CHECK-NEXT: "line": 12, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 166, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 170, +// CHECK-NEXT: "col": 5, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "e", +// CHECK-NEXT: "mangledName": "e", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "int" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } diff --git a/clang/test/AST/gen_ast_dump_json_test.py b/clang/test/AST/gen_ast_dump_json_test.py index 3a4064699657d..f783c79faef86 100644 --- a/clang/test/AST/gen_ast_dump_json_test.py +++ b/clang/test/AST/gen_ast_dump_json_test.py @@ -180,7 +180,7 @@ def process_file(source_file, clang_binary, cmdline_filters, cmdline_opts, filter_json(j, filters, out_asts) - with tempfile.NamedTemporaryFile("w") as f: + with tempfile.NamedTemporaryFile("wb", delete=False) as f: with open(source_file, "r") as srcf: for line in srcf.readlines(): # copy up to the note: @@ -201,6 +201,7 @@ def process_file(source_file, clang_binary, cmdline_filters, cmdline_opts, f.write(out_str) f.flush() + f.close() if do_update: print("Updating json appended source file to %s." % source_file) copyfile(f.name, source_file) @@ -209,6 +210,7 @@ def process_file(source_file, clang_binary, cmdline_filters, cmdline_opts, dest_path = '%s-json%s%s' % (partition[0], partition[1], partition[2]) print("Writing json appended source file to %s." % dest_path) copyfile(f.name, dest_path) + os.remove(f.name) return 0 diff --git a/clang/test/AST/language_address_space_attribute.cpp b/clang/test/AST/language_address_space_attribute.cpp index 8e098d6ce228f..7c6bdca06c06a 100644 --- a/clang/test/AST/language_address_space_attribute.cpp +++ b/clang/test/AST/language_address_space_attribute.cpp @@ -5,32 +5,32 @@ void langas() { // CHECK: VarDecl {{.*}} x_global '__global int *' - __attribute__((ocl_global)) int *x_global; + __attribute__((opencl_global)) int *x_global; // CHECK: VarDecl {{.*}} z_global '__global int *' - [[clang::ocl_global]] int *z_global; + [[clang::opencl_global]] int *z_global; // CHECK: VarDecl {{.*}} x_local '__local int *' - __attribute__((ocl_local)) int *x_local; + __attribute__((opencl_local)) int *x_local; // CHECK: VarDecl {{.*}} z_local '__local int *' - [[clang::ocl_local]] int *z_local; + [[clang::opencl_local]] int *z_local; // CHECK: VarDecl {{.*}} x_constant '__constant int *' - __attribute__((ocl_constant)) int *x_constant; + __attribute__((opencl_constant)) int *x_constant; // CHECK: VarDecl {{.*}} z_constant '__constant int *' - [[clang::ocl_constant]] int *z_constant; + [[clang::opencl_constant]] int *z_constant; // CHECK: VarDecl {{.*}} x_private 'int *' - __attribute__((ocl_private)) int *x_private; + __attribute__((opencl_private)) int *x_private; // CHECK: VarDecl {{.*}} z_private 'int *' - [[clang::ocl_private]] int *z_private; + [[clang::opencl_private]] int *z_private; // CHECK: VarDecl {{.*}} x_generic '__generic int *' - __attribute__((ocl_generic)) int *x_generic; + __attribute__((opencl_generic)) int *x_generic; // CHECK: VarDecl {{.*}} z_generic '__generic int *' - [[clang::ocl_generic]] int *z_generic; + [[clang::opencl_generic]] int *z_generic; } diff --git a/clang/test/Analysis/properties.m b/clang/test/Analysis/properties.m index 2f427f2751820..d83b8ed14f93c 100644 --- a/clang/test/Analysis/properties.m +++ b/clang/test/Analysis/properties.m @@ -1049,6 +1049,8 @@ - (NSObject *)getShadowedIvar; - (void)clearShadowedIvar; - (NSObject *)getShadowedProp; - (void)clearShadowedProp; + +@property (assign) NSObject *o2; @end @implementation Shadowed @@ -1078,7 +1080,7 @@ @implementation Shadowing @synthesize o; -(void)testPropertyShadowing { - NSObject *oo = self.o; + NSObject *oo = self.o; // no-crash clang_analyzer_eval(self.o == oo); // expected-warning{{TRUE}} clang_analyzer_eval([self getShadowedIvar] == oo); // expected-warning{{UNKNOWN}} [self clearShadowedIvar]; @@ -1086,4 +1088,10 @@ -(void)testPropertyShadowing { clang_analyzer_eval([self getShadowedIvar] == oo); // expected-warning{{UNKNOWN}} clang_analyzer_eval([self getShadowedIvar] == nil); // expected-warning{{TRUE}} } + +@synthesize o2 = ooo2; + +-(void)testPropertyShadowingWithExplicitIvar { + NSObject *oo2 = self.o2; // no-crash +} @end diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c index 61a97493f5351..e1db6780d90a2 100644 --- a/clang/test/Analysis/stream.c +++ b/clang/test/Analysis/stream.c @@ -20,6 +20,7 @@ extern void clearerr(FILE *stream); extern int feof(FILE *stream); extern int ferror(FILE *stream); extern int fileno(FILE *stream); +extern FILE *freopen(const char *pathname, const char *mode, FILE *stream); void check_fread() { FILE *fp = tmpfile(); @@ -111,6 +112,13 @@ void f_double_close(void) { fclose(p); // expected-warning {{Try to close a file Descriptor already closed. Cause undefined behaviour}} } +void f_double_close_alias(void) { + FILE *p1 = fopen("foo", "r"); + FILE *p2 = p1; + fclose(p1); + fclose(p2); // expected-warning {{Try to close a file Descriptor already closed. Cause undefined behaviour}} +} + void f_leak(int c) { FILE *p = fopen("foo.c", "r"); if(c) @@ -134,3 +142,37 @@ void pr7831(FILE *fp) { void pr8081(FILE *stream, long offset, int whence) { fseek(stream, offset, whence); } + +void check_freopen_1() { + FILE *f1 = freopen("foo.c", "r", (FILE *)0); // expected-warning {{Stream pointer might be NULL}} + f1 = freopen(0, "w", (FILE *)0x123456); // Do not report this as error. +} + +void check_freopen_2() { + FILE *f1 = fopen("foo.c", "r"); + if (f1) { + FILE *f2 = freopen(0, "w", f1); + if (f2) { + // Check if f1 and f2 point to the same stream. + fclose(f1); + fclose(f2); // expected-warning {{Try to close a file Descriptor already closed. Cause undefined behaviour}} + } else { + // Reopen failed. + // f1 points now to a possibly invalid stream but this condition is currently not checked. + // f2 is NULL. + rewind(f1); + rewind(f2); // expected-warning {{Stream pointer might be NULL}} + } + } +} + +void check_freopen_3() { + FILE *f1 = fopen("foo.c", "r"); + if (f1) { + // Unchecked result of freopen. + // The f1 may be invalid after this call (not checked by the checker). + freopen(0, "w", f1); + rewind(f1); + fclose(f1); + } +} diff --git a/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json b/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json new file mode 100644 index 0000000000000..36ca006b03297 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json @@ -0,0 +1,7 @@ +[ +{ + "directory": "DIR", + "command": "clang -E DIR/has_include_if_elif2.cpp -IInputs", + "file": "DIR/has_include_if_elif2.cpp" +} +] diff --git a/clang/test/ClangScanDeps/has_include_if_elif.cpp b/clang/test/ClangScanDeps/has_include_if_elif.cpp new file mode 100644 index 0000000000000..dd56ecac69dbd --- /dev/null +++ b/clang/test/ClangScanDeps/has_include_if_elif.cpp @@ -0,0 +1,38 @@ +// RUN: rm -rf %t.dir +// RUN: rm -rf %t.cdb +// RUN: mkdir -p %t.dir +// RUN: cp %s %t.dir/has_include_if_elif2.cpp +// RUN: mkdir %t.dir/Inputs +// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h +// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header2.h +// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header3.h +// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header4.h +// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/has_include_if_elif.json > %t.cdb +// +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: FileCheck %s +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ +// RUN: FileCheck %s + +#if __has_include("header.h") +#endif + +#if 0 +#elif __has_include("header2.h") +#endif + +#define H3 __has_include("header3.h") +#if H3 +#endif + +#define H4 __has_include("header4.h") + +#if 0 +#elif H4 +#endif + +// CHECK: has_include_if_elif2.cpp +// CHECK-NEXT: Inputs{{/|\\}}header.h +// CHECK-NEXT: Inputs{{/|\\}}header2.h +// CHECK-NEXT: Inputs{{/|\\}}header3.h +// CHECK-NEXT: Inputs{{/|\\}}header4.h diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index b29d877dd8eca..7744b4f4a159d 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -17756,8 +17756,6 @@ float32_t test_vminnmv_f32(float32x2_t a) { } // CHECK-LABEL: @test_vpaddq_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] @@ -17766,8 +17764,6 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vpaddq_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] diff --git a/clang/test/CodeGen/aarch64-neon-vcadd.c b/clang/test/CodeGen/aarch64-neon-vcadd.c new file mode 100644 index 0000000000000..2d721f187fe62 --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-vcadd.c @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -target-feature +v8.3a -target-feature +fullfp16 -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s + +#include + +void foo16x4_rot90(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16 + float16x4_t result = vcadd_rot90_f16(a, b); +} + +void foo32x2_rot90(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32 + float32x2_t result = vcadd_rot90_f32(a, b); +} + +void foo16x8_rot90(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16 + float16x8_t result = vcaddq_rot90_f16(a, b); +} + +void foo32x4_rot90(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32 + float32x4_t result = vcaddq_rot90_f32(a, b); +} + +void foo64x2_rot90(float64x2_t a, float64x2_t b) +{ +// CHECK: call <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64 + float64x2_t result = vcaddq_rot90_f64(a, b); +} + +void foo16x4_rot270(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16 + float16x4_t result = vcadd_rot270_f16(a, b); +} + +void foo32x2_rot270(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32 + float32x2_t result = vcadd_rot270_f32(a, b); +} + +void foo16x8_rot270(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16 + float16x8_t result = vcaddq_rot270_f16(a, b); +} + +void foo32x4_rot270(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32 + float32x4_t result = vcaddq_rot270_f32(a, b); +} + +void foo64x2_rot270(float64x2_t a, float64x2_t b) +{ +// CHECK: call <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64 + float64x2_t result = vcaddq_rot270_f64(a, b); +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/predicates.c b/clang/test/CodeGen/arm-mve-intrinsics/predicates.c new file mode 100644 index 0000000000000..5761849d094be --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/predicates.c @@ -0,0 +1,290 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include + +// CHECK-LABEL: @test_vctp16q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp16q(uint32_t a) +{ + return vctp16q(a); +} + +// CHECK-LABEL: @test_vctp16q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp16q_m(uint32_t a, mve_pred16_t p) +{ + return vctp16q_m(a, p); +} + +// CHECK-LABEL: @test_vctp32q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp32q(uint32_t a) +{ + return vctp32q(a); +} + +// CHECK-LABEL: @test_vctp32q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp32q_m(uint32_t a, mve_pred16_t p) +{ + return vctp32q_m(a, p); +} + +// CHECK-LABEL: @test_vctp64q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp64q(uint32_t a) +{ + return vctp64q(a); +} + +// CHECK-LABEL: @test_vctp64q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp64q_m(uint32_t a, mve_pred16_t p) +{ + return vctp64q_m(a, p); +} + +// CHECK-LABEL: @test_vctp8q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: ret i16 [[TMP2]] +// +mve_pred16_t test_vctp8q(uint32_t a) +{ + return vctp8q(a); +} + +// CHECK-LABEL: @test_vctp8q_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +// CHECK-NEXT: ret i16 [[TMP5]] +// +mve_pred16_t test_vctp8q_m(uint32_t a, mve_pred16_t p) +{ + return vctp8q_m(a, p); +} + +// CHECK-LABEL: @test_vpnot( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[A:%.*]], -1 +// CHECK-NEXT: ret i16 [[TMP0]] +// +mve_pred16_t test_vpnot(mve_pred16_t a) +{ + return vpnot(a); +} + +// CHECK-LABEL: @test_vpselq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]] +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vpselq_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_f16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vpselq_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_f32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vpselq_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vpselq_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP5]] +// +int64x2_t test_vpselq_s64(int64x2_t a, int64x2_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s64(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vpselq_s8(int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_s8(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vpselq_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vpselq_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP5]] +// +uint64x2_t test_vpselq_u64(uint64x2_t a, uint64x2_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u64(a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vpselq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vpselq_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vpselq(a, b, p); +#else /* POLYMORPHIC */ + return vpselq_u8(a, b, p); +#endif /* POLYMORPHIC */ +} + diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vabdq.c b/clang/test/CodeGen/arm-mve-intrinsics/vabdq.c new file mode 100644 index 0000000000000..a416bfb773e6b --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vabdq.c @@ -0,0 +1,95 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vabdq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vabd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) +{ +#ifdef POLYMORPHIC + return vabdq(a, b); +#else /* POLYMORPHIC */ + return vabdq_s8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vabdq(a, b); +#else /* POLYMORPHIC */ + return vabdq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vabd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vabdq_f32(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC + return vabdq(a, b); +#else /* POLYMORPHIC */ + return vabdq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vabdq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabdq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vabdq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabdq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabdq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vabdq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abd.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vabdq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabdq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vabdq_m_f32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vandq.c b/clang/test/CodeGen/arm-mve-intrinsics/vandq.c new file mode 100644 index 0000000000000..aeab8b7063ece --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vandq.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vandq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vandq(a, b); +#else /* POLYMORPHIC */ + return vandq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vandq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_vandq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vandq(a, b); +#else /* POLYMORPHIC */ + return vandq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vandq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vandq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vandq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vandq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vandq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vandq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vandq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vandq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c new file mode 100644 index 0000000000000..3106b40a322d1 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vbicq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[TMP0]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vbicq(a, b); +#else /* POLYMORPHIC */ + return vbicq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP0]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float32x4_t test_vbicq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vbicq(a, b); +#else /* POLYMORPHIC */ + return vbicq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vbicq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vbicq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vbicq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vbicq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/veorq.c b/clang/test/CodeGen/arm-mve-intrinsics/veorq.c new file mode 100644 index 0000000000000..c271568f791f3 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/veorq.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_veorq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return veorq(a, b); +#else /* POLYMORPHIC */ + return veorq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_veorq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_veorq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return veorq(a, b); +#else /* POLYMORPHIC */ + return veorq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_veorq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.eor.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_veorq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return veorq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return veorq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_veorq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.eor.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_veorq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return veorq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return veorq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c new file mode 100644 index 0000000000000..63300466c819e --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -0,0 +1,65 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmaxnmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC + return vmaxnmq(a, b); +#else /* POLYMORPHIC */ + return vmaxnmq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxnmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vmaxnmq(a, b); +#else /* POLYMORPHIC */ + return vmaxnmq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxnmq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxnmq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxnmq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxnmq_m_f32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c new file mode 100644 index 0000000000000..133e28d6cf047 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmaxq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) +{ +#ifdef POLYMORPHIC + return vmaxq(a, b); +#else /* POLYMORPHIC */ + return vmaxq_s8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// +uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vmaxq(a, b); +#else /* POLYMORPHIC */ + return vmaxq_u16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vmaxq(a, b); +#else /* POLYMORPHIC */ + return vmaxq_s32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxq_m_u8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxq_m_s16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxq_m_u32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c new file mode 100644 index 0000000000000..9ed5bf0c859be --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -0,0 +1,65 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vminnmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC + return vminnmq(a, b); +#else /* POLYMORPHIC */ + return vminnmq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminnmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vminnmq(a, b); +#else /* POLYMORPHIC */ + return vminnmq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminnmq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminnmq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminnmq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminnmq_m_f32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c new file mode 100644 index 0000000000000..9e54eaeb5d839 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vminq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vminq(a, b); +#else /* POLYMORPHIC */ + return vminq_u8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// +int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vminq(a, b); +#else /* POLYMORPHIC */ + return vminq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vminq(a, b); +#else /* POLYMORPHIC */ + return vminq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminq_m_s32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c new file mode 100644 index 0000000000000..63696d698c503 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c @@ -0,0 +1,95 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmulhq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vmulhq_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vmulhq(a, b); +#else /* POLYMORPHIC */ + return vmulhq_u8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulhq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vmulhq_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vmulhq(a, b); +#else /* POLYMORPHIC */ + return vmulhq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulhq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vmulhq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vmulhq(a, b); +#else /* POLYMORPHIC */ + return vmulhq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulhq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmulhq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulhq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulhq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulhq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmulhq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulhq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulhq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulhq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulhq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulhq_m_s32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c new file mode 100644 index 0000000000000..ac457cba81ebc --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c @@ -0,0 +1,125 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmulq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_u8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmulq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmulq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmulq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_s32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c new file mode 100644 index 0000000000000..753a6ddf2ee17 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vornq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[A:%.*]], [[TMP0]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vornq(a, b); +#else /* POLYMORPHIC */ + return vornq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vornq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP0]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float32x4_t test_vornq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vornq(a, b); +#else /* POLYMORPHIC */ + return vornq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vornq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vornq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vornq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vornq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vornq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vornq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vornq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vornq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vorrq.c b/clang/test/CodeGen/arm-mve-intrinsics/vorrq.c new file mode 100644 index 0000000000000..436f6277e073f --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vorrq.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vorrq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vorrq(a, b); +#else /* POLYMORPHIC */ + return vorrq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_vorrq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vorrq(a, b); +#else /* POLYMORPHIC */ + return vorrq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vorrq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vorrq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vorrq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vorrq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c b/clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c new file mode 100644 index 0000000000000..2c8148405585b --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c @@ -0,0 +1,95 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vrmulhq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vrmulhq_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vrmulhq(a, b); +#else /* POLYMORPHIC */ + return vrmulhq_u8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrmulhq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vrmulhq_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vrmulhq(a, b); +#else /* POLYMORPHIC */ + return vrmulhq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrmulhq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vrmulhq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vrmulhq(a, b); +#else /* POLYMORPHIC */ + return vrmulhq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrmulhq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrmulhq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrmulhq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vrmulhq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrmulhq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrmulhq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrmulhq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vrmulhq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrmulhq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrmulhq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vrmulhq_m_s32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-neon-vcadd.c b/clang/test/CodeGen/arm-neon-vcadd.c new file mode 100644 index 0000000000000..00b4641b5a087 --- /dev/null +++ b/clang/test/CodeGen/arm-neon-vcadd.c @@ -0,0 +1,54 @@ +// REQUIRES: arm-registered-target +// RUN: %clang_cc1 -triple armv8.3a-arm-none-eabi -target-cpu generic \ +// RUN: -target-feature +fullfp16 -mfloat-abi soft -S -emit-llvm -o - %s | \ +// RUN: opt -S -sroa -o - | FileCheck %s + +#include + +void foo16x4_rot90(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16 + float16x4_t result = vcadd_rot90_f16(a, b); +} + +void foo32x2_rot90(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32 + float32x2_t result = vcadd_rot90_f32(a, b); +} + +void foo16x8_rot90(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16 + float16x8_t result = vcaddq_rot90_f16(a, b); +} + +void foo32x4_rot90(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32 + float32x4_t result = vcaddq_rot90_f32(a, b); +} + +void foo16x4_rot270(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16 + float16x4_t result = vcadd_rot270_f16(a, b); +} + +void foo32x2_rot270(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32 + float32x2_t result = vcadd_rot270_f32(a, b); +} + +void foo16x8_rot270(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16 + float16x8_t result = vcaddq_rot270_f16(a, b); +} + +void foo32x4_rot270(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32 + float32x4_t result = vcaddq_rot270_f32(a, b); +} diff --git a/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c index 6f5867b6c11f7..5462c17a1cc50 100644 --- a/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c +++ b/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c @@ -13,7 +13,7 @@ // CHECK-LABEL: test_vqrdmlah_s16 int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -23,7 +23,7 @@ int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: test_vqrdmlah_s32 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -33,7 +33,7 @@ int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-LABEL: test_vqrdmlahq_s16 int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -43,7 +43,7 @@ int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-LABEL: test_vqrdmlahq_s32 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) @@ -54,7 +54,7 @@ int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -66,7 +66,7 @@ int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -78,7 +78,7 @@ int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -90,7 +90,7 @@ int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) @@ -101,7 +101,7 @@ int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { // CHECK-LABEL: test_vqrdmlsh_s16 int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -111,7 +111,7 @@ int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: test_vqrdmlsh_s32 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -121,7 +121,7 @@ int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-LABEL: test_vqrdmlshq_s16 int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -131,7 +131,7 @@ int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-LABEL: test_vqrdmlshq_s32 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) @@ -142,7 +142,7 @@ int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -154,7 +154,7 @@ int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -166,7 +166,7 @@ int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -178,7 +178,7 @@ int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 5c34d1c37de09..9f1a64554155c 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -2147,6 +2147,13 @@ int8x8_t test_vcreate_s8(uint64_t a) { return vclz_s8(vcreate_s8(a)); } +// CHECK-LABEL: @test_vcreate_imm +// CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16> +// CHECK: ret <4 x i16> [[RES]] +int16x4_t test_vcreate_imm(void) { + return vcreate_s16(0); +} + // CHECK-LABEL: @test_vcreate_s16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> @@ -9523,7 +9530,7 @@ int32x4_t test_vqabsq_s32(int32x4_t a) { } // CHECK-LABEL: @test_vqadd_s8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { return vqadd_s8(a, b); @@ -9532,7 +9539,7 @@ int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { @@ -9542,7 +9549,7 @@ int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { @@ -9552,7 +9559,7 @@ int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { @@ -9560,7 +9567,7 @@ int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqadd_u8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { return vqadd_u8(a, b); @@ -9569,7 +9576,7 @@ uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { @@ -9579,7 +9586,7 @@ uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { @@ -9589,7 +9596,7 @@ uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { @@ -9597,7 +9604,7 @@ uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqaddq_s8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { return vqaddq_s8(a, b); @@ -9606,7 +9613,7 @@ int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { @@ -9616,7 +9623,7 @@ int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { @@ -9626,7 +9633,7 @@ int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { @@ -9634,7 +9641,7 @@ int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqaddq_u8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { return vqaddq_u8(a, b); @@ -9643,7 +9650,7 @@ uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -9653,7 +9660,7 @@ uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -9663,7 +9670,7 @@ uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { @@ -9675,7 +9682,7 @@ uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_s16(a, b, c); @@ -9686,7 +9693,7 @@ int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_s32(a, b, c); @@ -9698,7 +9705,7 @@ int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_lane_s16(a, b, c, 3); @@ -9710,7 +9717,7 @@ int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_lane_s32(a, b, c, 1); @@ -9725,7 +9732,7 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) -// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) +// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlal_n_s16(a, b, c); @@ -9738,7 +9745,7 @@ int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) -// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) +// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlal_n_s32(a, b, c); @@ -9749,7 +9756,7 @@ int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_s16(a, b, c); @@ -9760,7 +9767,7 @@ int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_s32(a, b, c); @@ -9772,7 +9779,7 @@ int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_lane_s16(a, b, c, 3); @@ -9784,7 +9791,7 @@ int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_lane_s32(a, b, c, 1); @@ -9799,7 +9806,7 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) -// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) +// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlsl_n_s16(a, b, c); @@ -9812,7 +9819,7 @@ int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) -// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) +// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlsl_n_s32(a, b, c); @@ -10961,7 +10968,7 @@ uint32x2_t test_vqshrun_n_s64(int64x2_t a) { } // CHECK-LABEL: @test_vqsub_s8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { return vqsub_s8(a, b); @@ -10970,7 +10977,7 @@ int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { @@ -10980,7 +10987,7 @@ int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { @@ -10990,7 +10997,7 @@ int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqsub_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { @@ -10998,7 +11005,7 @@ int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqsub_u8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { return vqsub_u8(a, b); @@ -11007,7 +11014,7 @@ uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { @@ -11017,7 +11024,7 @@ uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { @@ -11027,7 +11034,7 @@ uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqsub_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { @@ -11035,7 +11042,7 @@ uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqsubq_s8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { return vqsubq_s8(a, b); @@ -11044,7 +11051,7 @@ int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { @@ -11054,7 +11061,7 @@ int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { @@ -11064,7 +11071,7 @@ int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqsubq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { @@ -11072,7 +11079,7 @@ int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqsubq_u8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { return vqsubq_u8(a, b); @@ -11081,7 +11088,7 @@ uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { @@ -11091,7 +11098,7 @@ uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { @@ -11101,7 +11108,7 @@ uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqsubq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { diff --git a/clang/test/CodeGen/builtins-mips-args.c b/clang/test/CodeGen/builtins-mips-args.c index cdb42af4a53d1..a135848805aaf 100644 --- a/clang/test/CodeGen/builtins-mips-args.c +++ b/clang/test/CodeGen/builtins-mips-args.c @@ -1,5 +1,6 @@ // REQUIRES: mips-registered-target -// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -target-feature +dspr2 \ +// RUN: -fsyntax-only -verify %s void foo() { // MIPS DSP Rev 1 diff --git a/clang/test/CodeGen/builtins-mips.c b/clang/test/CodeGen/builtins-mips.c index c6be896e81928..d26f630c35d7d 100644 --- a/clang/test/CodeGen/builtins-mips.c +++ b/clang/test/CodeGen/builtins-mips.c @@ -1,5 +1,6 @@ // REQUIRES: mips-registered-target -// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -emit-llvm %s -o - \ +// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -emit-llvm %s \ +// RUN: -target-feature +dspr2 -o - \ // RUN: | FileCheck %s typedef int q31; diff --git a/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c b/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c new file mode 100644 index 0000000000000..2e060cfcddef3 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// If we have an enum, it will be promoted to an unsigned integer. +// But both types are unsigned, and have same bitwidth. +// So we should not emit any sanitization. Also, for inc/dec we currently assume +// (assert) that we will only have cases where at least one of the types +// is signed, which isn't the case here. +typedef enum { a } b; +b t0(b c) { + c--; + return c; +} diff --git a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c new file mode 100644 index 0000000000000..e97a72cb0a339 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c new file mode 100644 index 0000000000000..5e0aa1108dfc9 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c new file mode 100644 index 0000000000000..e97a72cb0a339 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c new file mode 100644 index 0000000000000..93495b331b9f8 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c new file mode 100644 index 0000000000000..41e08ee32a525 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c @@ -0,0 +1,307 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE + +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fno-sanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-trap=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } + +unsigned short t0(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t0( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t0( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 100 + return x++; +} +unsigned short t1(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t1( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t1( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 200 + return x--; +} + +unsigned short t2(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t2( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t2( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 300 + return ++x; +} + +unsigned short t3(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t3( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t3( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 400 + return --x; +} + +signed short t4(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t4( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t4( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 500 + return x++; +} +signed short t5(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t5( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t5( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 600 + return x--; +} + +signed short t6(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t6( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t6( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 700 + return ++x; +} + +signed short t7(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t7( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t7( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 800 + return --x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..6ac2be6d9fd0c --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..b7e438c7229ce --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c new file mode 100644 index 0000000000000..1e0bad1844c50 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c @@ -0,0 +1,303 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE + +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fno-sanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-trap=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } + +unsigned short t0(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t0( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t0( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 100 + return x++; +} +unsigned short t1(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t1( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t1( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 200 + return x--; +} + +unsigned short t2(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t2( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t2( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 300 + return ++x; +} + +unsigned short t3(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t3( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t3( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 400 + return --x; +} + +signed short t4(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t4( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t4( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 500 + return x++; +} +signed short t5(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t5( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t5( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 600 + return x--; +} + +signed short t6(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t6( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t6( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 700 + return ++x; +} + +signed short t7(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t7( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t7( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 800 + return --x; +} diff --git a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..7ad12314f3df0 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/debug-prefix-map.c b/clang/test/CodeGen/debug-prefix-map.c index d6032a658c2e2..5366e19447ae2 100644 --- a/clang/test/CodeGen/debug-prefix-map.c +++ b/clang/test/CodeGen/debug-prefix-map.c @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/UNLIKELY_PATH=empty %s -emit-llvm -o - | FileCheck %s -check-prefix CHECK-EVIL // RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/UNLIKELY_PATH/empty %s -emit-llvm -o - -main-file-name debug-prefix-map.c | FileCheck %s // RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/UNLIKELY_PATH/empty %s -emit-llvm -o - -fdebug-compilation-dir %p | FileCheck %s -check-prefix CHECK-COMPILATION-DIR +// RUN: %clang -g -fdebug-prefix-map=%p=/UNLIKELY_PATH/empty -S -c %s -emit-llvm -o - | FileCheck %s +// RUN: %clang -g -ffile-prefix-map=%p=/UNLIKELY_PATH/empty -S -c %s -emit-llvm -o - | FileCheck %s #include "Inputs/stdio.h" @@ -17,21 +19,21 @@ void test_rewrite_includes() { } // CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}" -// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}", +// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}{{.*}}", // On POSIX systems "Dir" should actually be empty, but on Windows we // can't recognize "/UNLIKELY_PATH" as being an absolute path. // CHECK-NO-MAIN-FILE-NAME-SAME: directory: "{{()|(.*:.*)}}") -// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}Inputs/stdio.h", +// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}Inputs/stdio.h", // CHECK-NO-MAIN-FILE-NAME-SAME: directory: "{{()|(.*:.*)}}") // CHECK-NO-MAIN-FILE-NAME-NOT: !DIFile(filename: -// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{[/\\]}}{{.*}}" -// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{[/\\]}}{{.*}}Inputs/stdio.h", +// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{/|\\\\}}{{.*}}" +// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{/|\\\\}}{{.*}}Inputs/stdio.h", // CHECK-EVIL-SAME: directory: "{{()|(.*:.*)}}") // CHECK-EVIL-NOT: !DIFile(filename: -// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}" -// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}Inputs/stdio.h", +// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}{{.*}}" +// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}{{.*}}Inputs/stdio.h", // CHECK-SAME: directory: "{{()|(.*:.*)}}") // CHECK-NOT: !DIFile(filename: diff --git a/clang/test/CodeGen/fpconstrained.c b/clang/test/CodeGen/fpconstrained.c new file mode 100644 index 0000000000000..0a890e2e702eb --- /dev/null +++ b/clang/test/CodeGen/fpconstrained.c @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -ftrapping-math -frounding-math -ffp-exception-behavior=strict -emit-llvm -o - %s | FileCheck %s -check-prefix=FPMODELSTRICT +// RUN: %clang_cc1 -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -check-prefix=PRECISE +// RUN: %clang_cc1 -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST +// RUN: %clang_cc1 -ffast-math -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST +// RUN: %clang_cc1 -ffast-math -ffp-contract=fast -ffp-exception-behavior=ignore -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST +// RUN: %clang_cc1 -ffast-math -ffp-contract=fast -ffp-exception-behavior=strict -emit-llvm -o - %s | FileCheck %s -check-prefix=EXCEPT +// RUN: %clang_cc1 -ffast-math -ffp-contract=fast -ffp-exception-behavior=maytrap -emit-llvm -o - %s | FileCheck %s -check-prefix=MAYTRAP +float f0, f1, f2; + +void foo() { + // CHECK-LABEL: define {{.*}}void @foo() + + // MAYTRAP: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // EXCEPT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // FPMODELSTRICT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.dynamic", metadata !"fpexcept.strict") + // STRICTEXCEPT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.dynamic", metadata !"fpexcept.strict") + // STRICTNOEXCEPT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.dynamic", metadata !"fpexcept.ignore") + // PRECISE: fadd contract float %{{.*}}, %{{.*}} + // FAST: fadd fast + f0 = f1 + f2; + + // CHECK: ret +} diff --git a/clang/test/CodeGen/fpconstrained.cpp b/clang/test/CodeGen/fpconstrained.cpp new file mode 100644 index 0000000000000..7aa34c98a4879 --- /dev/null +++ b/clang/test/CodeGen/fpconstrained.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -x c++ -ftrapping-math -fexceptions -fcxx-exceptions -frounding-math -ffp-exception-behavior=strict -emit-llvm -o - %s | FileCheck %s -check-prefix=FPMODELSTRICT +// RUN: %clang_cc1 -x c++ -ffp-contract=fast -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix=PRECISE +// RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST +// RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST +// RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -ffp-exception-behavior=ignore -emit-llvm -o - %s | FileCheck %s -check-prefix=FAST +// RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -ffp-exception-behavior=strict -emit-llvm -o - %s | FileCheck %s -check-prefix=EXCEPT +// RUN: %clang_cc1 -x c++ -ffast-math -fexceptions -fcxx-exceptions -ffp-contract=fast -ffp-exception-behavior=maytrap -emit-llvm -o - %s | FileCheck %s -check-prefix=MAYTRAP +float f0, f1, f2; + + template + class aaaa { + public: + ~aaaa(); + void b(); + }; + + template + aaaa::~aaaa() { try { + b(); + // CHECK-LABEL: define {{.*}}void @_ZN4aaaaIiED2Ev{{.*}} + + } catch (...) { + // MAYTRAP: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // EXCEPT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // FPMODELSTRICT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.dynamic", metadata !"fpexcept.strict") + // STRICTEXCEPT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.dynamic", metadata !"fpexcept.strict") + // STRICTNOEXCEPT: llvm.experimental.constrained.fadd.f32(float %{{.*}}, float %{{.*}}, metadata !"round.dynamic", metadata !"fpexcept.ignore") + // PRECISE: fadd contract float %{{.*}}, %{{.*}} + // FAST: fadd fast + f0 = f1 + f2; + + // CHECK: ret void + } + } + + class d { + public: + d(const char *, int); + aaaa e; + }; + +float foo() { + d x("", 1); + aaaa a; + return f0; +} + diff --git a/clang/test/CodeGen/label-array-aggregate-init.c b/clang/test/CodeGen/label-array-aggregate-init.c new file mode 100644 index 0000000000000..5cefd8d270c08 --- /dev/null +++ b/clang/test/CodeGen/label-array-aggregate-init.c @@ -0,0 +1,10 @@ +// RUN: %clang -cc1 -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s + +// CHECK: @constinit = private global [3 x i8*] [i8* blockaddress(@main, %L), i8* null, i8* null] + +void receivePtrs(void **); + +int main() { +L: + receivePtrs((void *[]){ &&L, 0, 0 }); +} diff --git a/clang/test/CodeGenCXX/constructor-destructor-return-this.cpp b/clang/test/CodeGenCXX/constructor-destructor-return-this.cpp index f6450e2d4d77d..7ef9e116df8b8 100644 --- a/clang/test/CodeGenCXX/constructor-destructor-return-this.cpp +++ b/clang/test/CodeGenCXX/constructor-destructor-return-this.cpp @@ -3,6 +3,8 @@ //RUN: %clang_cc1 %s -emit-llvm -o - -triple=thumbv7-apple-ios5.0 -target-abi apcs-gnu | FileCheck --check-prefix=CHECKIOS5 %s //RUN: %clang_cc1 %s -emit-llvm -o - -triple=wasm32-unknown-unknown \ //RUN: | FileCheck --check-prefix=CHECKARM %s +//RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-fuchsia | FileCheck --check-prefix=CHECKFUCHSIA %s +//RUN: %clang_cc1 %s -emit-llvm -o - -triple=aarch64-unknown-fuchsia | FileCheck --check-prefix=CHECKFUCHSIA %s //RUN: %clang_cc1 %s -emit-llvm -o - -triple=i386-pc-win32 -fno-rtti | FileCheck --check-prefix=CHECKMS %s // FIXME: these tests crash on the bots when run with -triple=x86_64-pc-win32 @@ -45,6 +47,11 @@ B::~B() { } // CHECKIOS5-LABEL: define %class.B* @_ZN1BD2Ev(%class.B* %this) // CHECKIOS5-LABEL: define %class.B* @_ZN1BD1Ev(%class.B* %this) +// CHECKFUCHSIA-LABEL: define %class.B* @_ZN1BC2EPi(%class.B* returned %this, i32* %i) +// CHECKFUCHSIA-LABEL: define %class.B* @_ZN1BC1EPi(%class.B* returned %this, i32* %i) +// CHECKFUCHSIA-LABEL: define %class.B* @_ZN1BD2Ev(%class.B* returned %this) +// CHECKFUCHSIA-LABEL: define %class.B* @_ZN1BD1Ev(%class.B* returned %this) + // CHECKMS-LABEL: define dso_local x86_thiscallcc %class.B* @"??0B@@QAE@PAH@Z"(%class.B* returned %this, i32* %i) // CHECKMS-LABEL: define dso_local x86_thiscallcc void @"??1B@@UAE@XZ"(%class.B* %this) @@ -83,6 +90,14 @@ C::~C() { } // CHECKIOS5-LABEL: define void @_ZN1CD0Ev(%class.C* %this) // CHECKIOS5-LABEL: define void @_ZThn8_N1CD0Ev(%class.C* %this) +// CHECKFUCHSIA-LABEL: define %class.C* @_ZN1CC2EPiPc(%class.C* returned %this, i32* %i, i8* %c) +// CHECKFUCHSIA-LABEL: define %class.C* @_ZN1CC1EPiPc(%class.C* returned %this, i32* %i, i8* %c) +// CHECKFUCHSIA-LABEL: define %class.C* @_ZN1CD2Ev(%class.C* returned %this) +// CHECKFUCHSIA-LABEL: define %class.C* @_ZN1CD1Ev(%class.C* returned %this) +// CHECKFUCHSIA-LABEL: define %class.C* @_ZThn16_N1CD1Ev(%class.C* %this) +// CHECKFUCHSIA-LABEL: define void @_ZN1CD0Ev(%class.C* %this) +// CHECKFUCHSIA-LABEL: define void @_ZThn16_N1CD0Ev(%class.C* %this) + // CHECKMS-LABEL: define dso_local x86_thiscallcc %class.C* @"??0C@@QAE@PAHPAD@Z"(%class.C* returned %this, i32* %i, i8* %c) // CHECKMS-LABEL: define dso_local x86_thiscallcc void @"??1C@@UAE@XZ"(%class.C* %this) @@ -110,6 +125,11 @@ D::~D() { } // CHECKIOS5-LABEL: define %class.D* @_ZN1DD2Ev(%class.D* %this, i8** %vtt) // CHECKIOS5-LABEL: define %class.D* @_ZN1DD1Ev(%class.D* %this) +// CHECKFUCHSIA-LABEL: define %class.D* @_ZN1DC2Ev(%class.D* returned %this, i8** %vtt) +// CHECKFUCHSIA-LABEL: define %class.D* @_ZN1DC1Ev(%class.D* returned %this) +// CHECKFUCHSIA-LABEL: define %class.D* @_ZN1DD2Ev(%class.D* returned %this, i8** %vtt) +// CHECKFUCHSIA-LABEL: define %class.D* @_ZN1DD1Ev(%class.D* returned %this) + // CHECKMS-LABEL: define dso_local x86_thiscallcc %class.D* @"??0D@@QAE@XZ"(%class.D* returned %this, i32 %is_most_derived) // CHECKMS-LABEL: define dso_local x86_thiscallcc void @"??1D@@UAE@XZ"(%class.D* %this) @@ -127,15 +147,15 @@ void test_destructor() { e2->~E(); } -// CHECKARM-LABEL: define void @_Z15test_destructorv() +// CHECKARM-LABEL,CHECKFUCHSIA-LABEL: define void @_Z15test_destructorv() // Verify that virtual calls to destructors are not marked with a 'returned' // this parameter at the call site... -// CHECKARM: [[VFN:%.*]] = getelementptr inbounds %class.E* (%class.E*)*, %class.E* (%class.E*)** -// CHECKARM: [[THUNK:%.*]] = load %class.E* (%class.E*)*, %class.E* (%class.E*)** [[VFN]] -// CHECKARM: call %class.E* [[THUNK]](%class.E* % +// CHECKARM,CHECKFUCHSIA: [[VFN:%.*]] = getelementptr inbounds %class.E* (%class.E*)*, %class.E* (%class.E*)** +// CHECKARM,CHECKFUCHSIA: [[THUNK:%.*]] = load %class.E* (%class.E*)*, %class.E* (%class.E*)** [[VFN]] +// CHECKARM,CHECKFUCHSIA: call %class.E* [[THUNK]](%class.E* % // ...but static calls create declarations with 'returned' this -// CHECKARM: {{%.*}} = call %class.E* @_ZN1ED1Ev(%class.E* % +// CHECKARM,CHECKFUCHSIA: {{%.*}} = call %class.E* @_ZN1ED1Ev(%class.E* % -// CHECKARM: declare %class.E* @_ZN1ED1Ev(%class.E* returned) +// CHECKARM,CHECKFUCHSIA: declare %class.E* @_ZN1ED1Ev(%class.E* returned) diff --git a/clang/test/CodeGenCXX/debug-info-template-align.cpp b/clang/test/CodeGenCXX/debug-info-template-align.cpp new file mode 100644 index 0000000000000..42fdb269a30b5 --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-template-align.cpp @@ -0,0 +1,14 @@ +// Test for debug info related to DW_AT_alignment attribute in the typedef operator +// Supported: -O0, standalone DI +// RUN: %clang_cc1 -dwarf-version=5 -emit-llvm -triple x86_64-linux-gnu %s -o - \ +// RUN: -O0 -disable-llvm-passes \ +// RUN: -debug-info-kind=standalone \ +// RUN: | FileCheck %s + +// CHECK: DIDerivedType(tag: DW_TAG_typedef, {{.*}}, align: 512 + +typedef char __attribute__((__aligned__(64))) alchar; + +int main() { + alchar newChar; +} diff --git a/clang/test/CodeGenCXX/dllexport-dtor-thunks.cpp b/clang/test/CodeGenCXX/dllexport-dtor-thunks.cpp index bda126eba855d..d2aa195e8cc3a 100644 --- a/clang/test/CodeGenCXX/dllexport-dtor-thunks.cpp +++ b/clang/test/CodeGenCXX/dllexport-dtor-thunks.cpp @@ -1,5 +1,12 @@ // RUN: %clang_cc1 -mconstructor-aliases -fms-extensions %s -emit-llvm -o - -triple x86_64-windows-msvc | FileCheck %s +namespace test1 { +struct A { ~A(); }; +struct __declspec(dllexport) B : virtual A { }; +// CHECK: define weak_odr dso_local dllexport void @"??1B@test1@@QEAA@XZ" +// CHECK: define weak_odr dso_local dllexport void @"??_DB@test1@@QEAAXXZ" +} + struct __declspec(dllexport) A { virtual ~A(); }; struct __declspec(dllexport) B { virtual ~B(); }; struct __declspec(dllexport) C : A, B { virtual ~C(); }; diff --git a/clang/test/CodeGenCXX/dllexport.cpp b/clang/test/CodeGenCXX/dllexport.cpp index 6c4077a5b9a7c..045cb450b7506 100644 --- a/clang/test/CodeGenCXX/dllexport.cpp +++ b/clang/test/CodeGenCXX/dllexport.cpp @@ -860,6 +860,20 @@ struct PR40006 { }; // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.InClassInits::PR40006"* @"??0PR40006@InClassInits@@QAE@XZ" +namespace pr40006 { +// Delay emitting the method also past the instantiation of Tmpl, i.e. +// until the top-level class Outer is completely finished. +template struct Tmpl {}; +struct Outer { + struct Inner { + __declspec(dllexport) Inner() = default; + unsigned int x = 0; + }; + Tmpl y; +}; +// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc %"struct.InClassInits::pr40006::Outer::Inner"* @"??0Inner@Outer@pr40006@InClassInits@@QAE@XZ" +} + // PR42857: Clang would try to emit the non-trivial explicitly defaulted // dllexport ctor twice when doing an explicit instantiation definition. struct Qux { Qux(); }; diff --git a/clang/test/CodeGenCXX/dllimport-dtor-thunks.cpp b/clang/test/CodeGenCXX/dllimport-dtor-thunks.cpp index da3227a49a4b5..53aa2cdbf3eef 100644 --- a/clang/test/CodeGenCXX/dllimport-dtor-thunks.cpp +++ b/clang/test/CodeGenCXX/dllimport-dtor-thunks.cpp @@ -19,9 +19,9 @@ struct __declspec(dllimport) ImportOverrideVDtor : public BaseClass { virtual ~ImportOverrideVDtor() {} }; -// Virtually inherits from a non-dllimport base class. This time we need to call -// the complete destructor and emit it inline. It's not exported from the DLL, -// and it must be emitted. +// Virtually inherits from a non-dllimport base class. In this case, we can +// expect the DLL to provide a definition of the complete dtor. See +// dllexport-dtor-thunks.cpp. struct __declspec(dllimport) ImportVBaseOverrideVDtor : public virtual BaseClass { virtual ~ImportVBaseOverrideVDtor() {} diff --git a/clang/test/CodeGenCXX/no-unique-address-2.cpp b/clang/test/CodeGenCXX/no-unique-address-2.cpp new file mode 100644 index 0000000000000..aa0c67758a192 --- /dev/null +++ b/clang/test/CodeGenCXX/no-unique-address-2.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++2a %s -emit-llvm -o - -triple x86_64-linux-gnu | FileCheck %s + +struct TriviallyCopyable {}; + +struct NonTriviallyCopyable { + NonTriviallyCopyable() = default; + NonTriviallyCopyable(const NonTriviallyCopyable&) = default; + NonTriviallyCopyable(NonTriviallyCopyable &&) {} +}; + +struct Foo { + int i; + [[no_unique_address]] TriviallyCopyable m; + [[no_unique_address]] NonTriviallyCopyable n; +}; + +void call() { + Foo foo; + Foo foo2(static_cast(foo)); +} + +// The memcpy call should copy exact 4 bytes for member 'int i' +// CHECK: define {{.*}} void @_ZN3FooC2EOS_ +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.+}}, i8* {{.+}}, i64 4, i1 false) +// CHECK: call void @_ZN20NonTriviallyCopyableC2EOS_ diff --git a/clang/test/CodeGenObjC/nontrivial-struct-param-init.m b/clang/test/CodeGenObjC/nontrivial-struct-param-init.m new file mode 100644 index 0000000000000..96a63b83ac761 --- /dev/null +++ b/clang/test/CodeGenObjC/nontrivial-struct-param-init.m @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple i386-apple-watchos6.0-simulator -emit-llvm -fblocks -fobjc-arc -o - %s | FileCheck %s + +// CHECK: %[[STRUCT_S:.*]] = type { i8* } + +typedef struct { + id x; +} S; + +// CHECK: define void @test0(i8* %[[A_0:.*]]) +// CHECK: %[[A:.*]] = alloca %[[STRUCT_S]], align 4 +// CHECK: %[[X:.*]] = getelementptr inbounds %[[STRUCT_S]], %[[STRUCT_S]]* %[[A]], i32 0, i32 0 +// CHECK: store i8* %[[A_0]], i8** %[[X]], align 4 +// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_S]]* %[[A]] to i8** +// CHECK: call void @__destructor_4_s0(i8** %[[V0]]) #2 + +void test0(S a) { +} diff --git a/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl b/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl index 7e3186b186152..64f2d89c5818a 100644 --- a/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl @@ -16,6 +16,13 @@ kernel void test_pure_attr(read_only image1d_t img) { float4 resf = read_imagef(img, 42); } +// Test that builtins with only one prototype are mangled. +// CHECK-LABEL: @test_mangling +// CHECK: call i32 @_Z12get_local_idj +kernel void test_mangling() { + size_t lid = get_local_id(0); +} + // CHECK: attributes [[ATTR_CONST]] = // CHECK-SAME: readnone // CHECK: attributes [[ATTR_PURE]] = diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl b/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl index d5d369fa80bb6..623d201c21800 100644 --- a/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl +++ b/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl @@ -69,3 +69,14 @@ void pr43145_3(int n) { // CHECK: bitcast i8 addrspace(4)* %add.ptr1 to %class.B2 addrspace(4)* // CHECK: call {{.*}} @_ZNU3AS42B26getRefEv } + +// Implicit conversion of derived to base. + +void functionWithBaseArgPtr(class B2 *b) {} +void functionWithBaseArgRef(class B2 &b) {} + +void pr43145_4() { + Derived d; + functionWithBaseArgPtr(&d); + functionWithBaseArgRef(d); +} diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 2b5def6c62a98..a57fbf7e74e52 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -137,7 +137,7 @@ class accessor { _ImplT impl; private: - void __init(__attribute__((ocl_global)) dataT *Ptr, range AccessRange, + void __init(__attribute__((opencl_global)) dataT *Ptr, range AccessRange, range MemRange, id Offset) {} }; diff --git a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp index 1d5beced187bd..4697c57363bb4 100644 --- a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp +++ b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp @@ -3,7 +3,7 @@ void bar(int & Data) {} // CHECK-DAG: define spir_func void @[[RAW_REF:[a-zA-Z0-9_]+]](i32 addrspace(4)* dereferenceable(4) % void bar2(int & Data) {} // CHECK-DAG: define spir_func void @[[RAW_REF2:[a-zA-Z0-9_]+]](i32 addrspace(4)* dereferenceable(4) % -void bar(__attribute__((ocl_local)) int &Data) {} +void bar(__attribute__((opencl_local)) int &Data) {} // CHECK-DAG: define spir_func void [[LOC_REF:@[a-zA-Z0-9_]+]](i32 addrspace(3)* dereferenceable(4) % void foo(int * Data) {} // CHECK-DAG: define spir_func void @[[RAW_PTR:[a-zA-Z0-9_]+]](i32 addrspace(4)* % @@ -20,12 +20,12 @@ void usages() { // CHECK-DAG: [[GLOB:%[a-zA-Z0-9]+]] = alloca i32 addrspace(1)* __attribute__((address_space(1))) int *GLOB; // CHECK-DAG: [[LOC:%[a-zA-Z0-9]+]] = alloca i32 addrspace(3)* - __attribute__((ocl_local)) int *LOC; + __attribute__((opencl_local)) int *LOC; // CHECK-DAG: [[NoAS:%[a-zA-Z0-9]+]] = alloca i32 addrspace(4)* int *NoAS; // CHECK-DAG: [[PRIV:%[a-zA-Z0-9]+]] = alloca i32* - __attribute__((ocl_private)) int *PRIV; + __attribute__((opencl_private)) int *PRIV; bar(*GLOB); // CHECK-DAG: [[GLOB_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[GLOB]] @@ -98,19 +98,19 @@ void usages2() { // CHECK-DAG: [[PRIV_NUM:%[a-zA-Z0-9_]+]] = alloca i32* __attribute__((address_space(0))) int *PRIV_NUM2; // CHECK-DAG: [[PRIV_NUM2:%[a-zA-Z0-9_]+]] = alloca i32* - __attribute__((ocl_private)) int *PRIV; + __attribute__((opencl_private)) int *PRIV; // CHECK-DAG: [[PRIV:%[a-zA-Z0-9_]+]] = alloca i32* __attribute__((address_space(1))) int *GLOB_NUM; // CHECK-DAG: [[GLOB_NUM:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(1)* - __attribute__((ocl_global)) int *GLOB; + __attribute__((opencl_global)) int *GLOB; // CHECK-DAG: [[GLOB:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(1)* __attribute__((address_space(2))) int *CONST_NUM; // CHECK-DAG: [[CONST_NUM:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(2)* - __attribute__((ocl_constant)) int *CONST; + __attribute__((opencl_constant)) int *CONST; // CHECK-DAG: [[CONST:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(2)* __attribute__((address_space(3))) int *LOCAL_NUM; // CHECK-DAG: [[LOCAL_NUM:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(3)* - __attribute__((ocl_local)) int *LOCAL; + __attribute__((opencl_local)) int *LOCAL; // CHECK-DAG: [[LOCAL:%[a-zA-Z0-9_]+]] = alloca i32 addrspace(3)* bar(*PRIV_NUM); diff --git a/clang/test/CoverageMapping/switch.cpp b/clang/test/CoverageMapping/switch.cpp index 30c64922201f4..25ea4053f4e2c 100644 --- a/clang/test/CoverageMapping/switch.cpp +++ b/clang/test/CoverageMapping/switch.cpp @@ -2,11 +2,11 @@ // CHECK: foo void foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+8]]:2 = #0 - switch(i) { + switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+4]]:10 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:11 = #2 return; case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #3 - break; // CHECK-NEXT: File 0, [[@LINE]]:10 -> [[@LINE+2]]:3 = #1 + break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:10 -> [[@LINE+2]]:3 = #1 } int x = 0; // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:2 = #1 } @@ -29,7 +29,7 @@ void bar(int i) { // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+20]]:2 = #0 nop(); switch (i) { // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+6]]:2 = #4 - nop(); // CHECK-NEXT: File 0, [[@LINE]]:5 -> [[@LINE+2]]:10 = 0 + nop(); // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+2]]:10 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #7 nop(); } @@ -47,7 +47,7 @@ void baz() { // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+5]]:2 = #0 // CHECK-NEXT: main int main() { // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+35]]:2 = #0 int i = 0; - switch(i) { + switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+8]]:10 = 0 case 0: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = #2 i = 1; break; @@ -58,16 +58,16 @@ int main() { // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+35]]:2 = #0 break; // CHECK-NEXT: File 0, [[@LINE]]:10 -> [[@LINE+2]]:3 = #1 } switch(i) { // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+23]]:2 = #1 - case 0: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = #6 - i = 1; + case 0: // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+6]]:10 = 0 + i = 1; // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+1]]:10 = #6 break; case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:10 = #7 i = 2; default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = (#7 + #8) break; // CHECK-NEXT: File 0, [[@LINE]]:10 -> [[@LINE+3]]:3 = #5 } - - switch(i) { // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+13]]:2 = #5 + // CHECK-NEXT: File 0, [[@LINE+1]]:3 -> [[@LINE+14]]:2 = #5 + switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+6]]:11 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+5]]:11 = #10 case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+4]]:11 = (#10 + #11) i = 11; @@ -82,10 +82,23 @@ int main() { // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+35]]:2 = #0 return 0; } + // CHECK: pr44011 +int pr44011(int i) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> {{.*}}:2 = #0 + switch (i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+6]]:13 = 0 + + case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = #2 + return 0; + + default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = #3 + return 1; + } +} // A region for counter #1 is missing due to the missing return. + + // FIXME: End location for "case 1" shouldn't point at the end of the switch. // CHECK: fallthrough int fallthrough(int i) { // CHECK-NEXT: File 0, [[@LINE]]:24 -> [[@LINE+12]]:2 = #0 - switch(i) { + switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+9]]:10 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+8]]:10 = #2 i = 23; case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = (#2 + #3) @@ -101,7 +114,7 @@ int fallthrough(int i) { // CHECK-NEXT: File 0, [[@LINE]]:24 -> [[@LINE+12]]:2 = void abort(void) __attribute((noreturn)); // CHECK: noret int noret(int x) { // CHECK-NEXT: File 0, [[@LINE]]:18 -> [[@LINE+9]]:2 - switch (x) { + switch (x) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+6]]:14 = 0 default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:12 abort(); case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 diff --git a/clang/test/CoverageMapping/switchmacro.c b/clang/test/CoverageMapping/switchmacro.c index f4c14f798f0be..fc0392fb91e53 100644 --- a/clang/test/CoverageMapping/switchmacro.c +++ b/clang/test/CoverageMapping/switchmacro.c @@ -4,7 +4,7 @@ // CHECK: foo int foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:16 -> {{[0-9]+}}:2 = #0 - switch (i) { + switch (i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> {{[0-9]+}}:11 = 0 default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> {{[0-9]+}}:11 = #2 if (i == 1) // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:15 = #2 return 0; // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:15 = #3 diff --git a/clang/test/Driver/Inputs/aix_ppc_tree/dummy0.s b/clang/test/Driver/Inputs/aix_ppc_tree/dummy0.s new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/aix_ppc_tree/dummy1.s b/clang/test/Driver/Inputs/aix_ppc_tree/dummy1.s new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/aix_ppc_tree/dummy2.s b/clang/test/Driver/Inputs/aix_ppc_tree/dummy2.s new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/aix-as.c b/clang/test/Driver/aix-as.c new file mode 100644 index 0000000000000..4f67d1ba90b70 --- /dev/null +++ b/clang/test/Driver/aix-as.c @@ -0,0 +1,73 @@ +// General tests that as(1) invocations on AIX targets are sane. Note that we +// only test assembler functionalities in this suite. + +// Check powerpc-ibm-aix7.1.0.0, 32-bit. +// RUN: %clang -no-canonical-prefixes %s -### -c -o %t.o 2>&1 \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: | FileCheck --check-prefix=CHECK-AS32 %s +// CHECK-AS32-NOT: warning: +// CHECK-AS32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-AS32: "{{.*}}as{{(.exe)?}}" +// CHECK-AS32: "-a32" +// CHECK-AS32: "-u" +// CHECK-AS32: "-many" + +// Check powerpc64-ibm-aix7.1.0.0, 64-bit. +// RUN: %clang -no-canonical-prefixes %s -### -c -o %t.o 2>&1 \ +// RUN: -target powerpc64-ibm-aix7.1.0.0 \ +// RUN: | FileCheck --check-prefix=CHECK-AS64 %s +// CHECK-AS64-NOT: warning: +// CHECK-AS64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" +// CHECK-AS64: "{{.*}}as{{(.exe)?}}" +// CHECK-AS64: "-a64" +// CHECK-AS64: "-u" +// CHECK-AS64: "-many" + + +// Check powerpc-ibm-aix7.1.0.0, 32-bit. -Xassembler option. +// RUN: %clang -no-canonical-prefixes %s -### -c -o %t.o 2>&1 \ +// RUN: -Xassembler -w \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: | FileCheck --check-prefix=CHECK-AS32-Xassembler %s +// CHECK-AS32-Xassembler-NOT: warning: +// CHECK-AS32-Xassembler: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-AS32-Xassembler: "{{.*}}as{{(.exe)?}}" +// CHECK-AS32-Xassembler: "-a32" +// CHECK-AS32-Xassembler: "-u" +// CHECK-AS32-Xassembler: "-many" +// CHECK-AS32-Xassembler: "-w" + +// Check powerpc64-ibm-aix7.1.0.0, 64-bit. -Wa,, option. +// RUN: %clang -no-canonical-prefixes %s -### -c -o %t.o 2>&1 \ +// RUN: -Wa,-v,-w \ +// RUN: -target powerpc64-ibm-aix7.1.0.0 \ +// RUN: | FileCheck --check-prefix=CHECK-AS64-Wa %s +// CHECK-AS64-Wa-NOT: warning: +// CHECK-AS64-Wa: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" +// CHECK-AS64-Wa: "{{.*}}as{{(.exe)?}}" +// CHECK-AS64-Wa: "-a64" +// CHECK-AS64-Wa: "-u" +// CHECK-AS64-Wa: "-many" +// CHECK-AS64-Wa: "-v" +// CHECK-AS64-Wa: "-w" + +// Check powerpc-ibm-aix7.1.0.0, 32-bit. Multiple input files. +// RUN: %clang -no-canonical-prefixes -### -c \ +// RUN: %S/Inputs/aix_ppc_tree/dummy0.s \ +// RUN: %S/Inputs/aix_ppc_tree/dummy1.s \ +// RUN: %S/Inputs/aix_ppc_tree/dummy2.s 2>&1 \ +// RUN: -target powerpc-ibm-aix7.1.0.0 \ +// RUN: | FileCheck --check-prefix=CHECK-AS32-MultiInput %s +// CHECK-AS32-MultiInput-NOT: warning: +// CHECK-AS32-MultiInput: "{{.*}}as{{(.exe)?}}" +// CHECK-AS32-MultiInput: "-a32" +// CHECK-AS32-MultiInput: "-u" +// CHECK-AS32-MultiInput: "-many" +// CHECK-AS32-MultiInput: "{{.*}}as{{(.exe)?}}" +// CHECK-AS32-MultiInput: "-a32" +// CHECK-AS32-MultiInput: "-u" +// CHECK-AS32-MultiInput: "-many" +// CHECK-AS32-MultiInput: "{{.*}}as{{(.exe)?}}" +// CHECK-AS32-MultiInput: "-a32" +// CHECK-AS32-MultiInput: "-u" +// CHECK-AS32-MultiInput: "-many" diff --git a/clang/test/Driver/arm-reserved-reg-options.c b/clang/test/Driver/arm-reserved-reg-options.c deleted file mode 100644 index e97c717d7e7e7..0000000000000 --- a/clang/test/Driver/arm-reserved-reg-options.c +++ /dev/null @@ -1,35 +0,0 @@ -// ## FP ARM + Thumb -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R11 %s -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r7 -mthumb -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r11 -mthumb -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target thumbv6m-none-eabi -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target thumbv6m-none-eabi -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// ## FP Darwin (R7) -// RUN: %clang -target armv6-apple-darwin9 -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target armv6-apple-darwin9 -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target armv6-apple-ios3 -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target armv6-apple-ios3 -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target armv7s-apple-darwin10 -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target armv7s-apple-darwin10 -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// ## FP Windows (R11) -// RUN: %clang -target armv7-windows -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R11 %s -// RUN: %clang -target armv7-windows -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// ## FRWPI (R9) -// RUN: %clang -target arm-arm-none-eabi -### -frwpi -ffixed-r9 -c %s 2>&1 | FileCheck -check-prefix=CHECK-RESERVED-FRWPI-CONFLICT %s -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r9 -c %s 2>&1 | FileCheck -check-prefix=CHECK-RESERVED-FRWPI-VALID %s -// RUN: %clang -target arm-arm-none-eabi -### -frwpi -c %s 2>&1 | FileCheck -check-prefix=CHECK-RESERVED-FRWPI-VALID %s - -// CHECK-ERROR-R11: error: '-ffixed-r11' has been specified but 'r11' is used as the frame pointer for this target -// CHECK-ERROR-R7: error: '-ffixed-r7' has been specified but 'r7' is used as the frame pointer for this target -// CHECK-NO-ERROR-NOT: may still be used as a frame pointer - -// CHECK-RESERVED-FRWPI-CONFLICT: option '-ffixed-r9' cannot be specified with '-frwpi' -// CHECK-RESERVED-FRWPI-VALID-NOT: option '-ffixed-r9' cannot be specified with '-frwpi' diff --git a/clang/test/Driver/check-time-trace.cpp b/clang/test/Driver/check-time-trace.cpp index 3c6a002ae8ab9..bff2c1984daa9 100644 --- a/clang/test/Driver/check-time-trace.cpp +++ b/clang/test/Driver/check-time-trace.cpp @@ -12,7 +12,7 @@ // CHECK-NEXT: "pid": // CHECK-NEXT: "tid": // CHECK-NEXT: "ts": -// CHECK: "name": "clang" +// CHECK: "name": "clang{{.*}}" // CHECK: "name": "process_name" template diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 17feaab26ab79..fef9cbfb115e0 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -198,6 +198,22 @@ // CHECK-EXTENDED-IDENTIFIERS-NOT: "-fextended-identifiers" // CHECK-NO-EXTENDED-IDENTIFIERS: error: unsupported option '-fno-extended-identifiers' +// RUN: %clang -### -S -frounding-math %s 2>&1 | FileCheck -check-prefix=CHECK-ROUNDING-MATH %s +// CHECK-ROUNDING-MATH: "-cc1" +// CHECK-ROUNDING-MATH: "-frounding-math" +// CHECK-ROUNDING-MATH-NOT: "-fno-rounding-math" +// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-ROUNDING-MATH-NOT %s +// RUN: %clang -### -S -ffp-model=imprecise %s 2>&1 | FileCheck -check-prefix=CHECK-FPMODEL %s +// CHECK-FPMODEL: unsupported argument 'imprecise' to option 'ffp-model=' +// RUN: %clang -### -S -ffp-model=precise %s 2>&1 | FileCheck -check-prefix=IGNORE %s +// RUN: %clang -### -S -ffp-model=strict %s 2>&1 | FileCheck -check-prefix=IGNORE %s +// RUN: %clang -### -S -ffp-model=fast %s 2>&1 | FileCheck -check-prefix=IGNORE %s +// RUN: %clang -### -S -ffp-exception-behavior=trap %s 2>&1 | FileCheck -check-prefix=CHECK-FPEB %s +// CHECK-FPEB: unsupported argument 'trap' to option 'ffp-exception-behavior=' +// RUN: %clang -### -S -ffp-exception-behavior=maytrap %s 2>&1 | FileCheck -check-prefix=IGNORE %s +// RUN: %clang -### -S -ffp-exception-behavior=ignore %s 2>&1 | FileCheck -check-prefix=IGNORE %s +// RUN: %clang -### -S -ffp-exception-behavior=strict %s 2>&1 | FileCheck -check-prefix=IGNORE %s + // RUN: %clang -### -S -fno-pascal-strings -mpascal-strings %s 2>&1 | FileCheck -check-prefix=CHECK-M-PASCAL-STRINGS %s // CHECK-M-PASCAL-STRINGS: "-fpascal-strings" @@ -320,7 +336,6 @@ // RUN: -fprefetch-loop-arrays \ // RUN: -fprofile-correction \ // RUN: -fprofile-values \ -// RUN: -frounding-math \ // RUN: -fschedule-insns \ // RUN: -fsignaling-nans \ // RUN: -fstrength-reduce \ @@ -385,7 +400,6 @@ // CHECK-WARNING-DAG: optimization flag '-fprefetch-loop-arrays' is not supported // CHECK-WARNING-DAG: optimization flag '-fprofile-correction' is not supported // CHECK-WARNING-DAG: optimization flag '-fprofile-values' is not supported -// CHECK-WARNING-DAG: optimization flag '-frounding-math' is not supported // CHECK-WARNING-DAG: optimization flag '-fschedule-insns' is not supported // CHECK-WARNING-DAG: optimization flag '-fsignaling-nans' is not supported // CHECK-WARNING-DAG: optimization flag '-fstrength-reduce' is not supported diff --git a/clang/test/Driver/darwin-opt-record.c b/clang/test/Driver/darwin-opt-record.c index ca0fad7ee16d3..7c674819663a5 100644 --- a/clang/test/Driver/darwin-opt-record.c +++ b/clang/test/Driver/darwin-opt-record.c @@ -1,6 +1,6 @@ // REQUIRES: system-darwin -// RUN: %clang -### -S -o FOO -fsave-optimization-record -arch x86_64 -arch x86_64h %s 2>&1 | FileCheck %s --check-prefix=CHECK-MULTIPLE-ARCH +// RUN: %clang -target x86_64-apple-darwin10 -### -c -o FOO -fsave-optimization-record -arch x86_64 -arch x86_64h %s 2>&1 | FileCheck %s --check-prefix=CHECK-MULTIPLE-ARCH // // CHECK-MULTIPLE-ARCH: "-cc1" // CHECK-MULTIPLE-ARCH: "-opt-record-file" "FOO-x86_64.opt.yaml" diff --git a/clang/test/Driver/debug-prefix-map.S b/clang/test/Driver/debug-prefix-map.S index 2ba66be0edfce..7d12a17479726 100644 --- a/clang/test/Driver/debug-prefix-map.S +++ b/clang/test/Driver/debug-prefix-map.S @@ -1,4 +1,5 @@ // RUN: %clang -### -g -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s +// RUN: %clang -### -g -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s // CHECK: cc1as // CHECK-SAME: -fdebug-prefix-map=old=new diff --git a/clang/test/Driver/debug-prefix-map.c b/clang/test/Driver/debug-prefix-map.c index b4f3859f982ab..f2c87cb7c11c9 100644 --- a/clang/test/Driver/debug-prefix-map.c +++ b/clang/test/Driver/debug-prefix-map.c @@ -1,9 +1,28 @@ -// RUN: %clang -### -fdebug-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-INVALID -// RUN: %clang -### -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-SIMPLE -// RUN: %clang -### -fdebug-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-COMPLEX -// RUN: %clang -### -fdebug-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-EMPTY - -// CHECK-INVALID: error: invalid argument 'old' to -fdebug-prefix-map -// CHECK-SIMPLE: fdebug-prefix-map=old=new -// CHECK-COMPLEX: fdebug-prefix-map=old=n=ew -// CHECK-EMPTY: fdebug-prefix-map=old= +// RUN: %clang -### -fdebug-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-INVALID +// RUN: %clang -### -fmacro-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-INVALID +// RUN: %clang -### -ffile-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-FILE-INVALID + +// RUN: %clang -### -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-SIMPLE +// RUN: %clang -### -fmacro-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-SIMPLE +// RUN: %clang -### -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-SIMPLE +// RUN: %clang -### -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-SIMPLE + +// RUN: %clang -### -fdebug-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-COMPLEX +// RUN: %clang -### -fmacro-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-COMPLEX +// RUN: %clang -### -ffile-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-COMPLEX +// RUN: %clang -### -ffile-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-COMPLEX + +// RUN: %clang -### -fdebug-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-EMPTY +// RUN: %clang -### -fmacro-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-EMPTY +// RUN: %clang -### -ffile-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-EMPTY +// RUN: %clang -### -ffile-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-EMPTY + +// CHECK-DEBUG-INVALID: error: invalid argument 'old' to -fdebug-prefix-map +// CHECK-MACRO-INVALID: error: invalid argument 'old' to -fmacro-prefix-map +// CHECK-FILE-INVALID: error: invalid argument 'old' to -ffile-prefix-map +// CHECK-DEBUG-SIMPLE: fdebug-prefix-map=old=new +// CHECK-MACRO-SIMPLE: fmacro-prefix-map=old=new +// CHECK-DEBUG-COMPLEX: fdebug-prefix-map=old=n=ew +// CHECK-MACRO-COMPLEX: fmacro-prefix-map=old=n=ew +// CHECK-DEBUG-EMPTY: fdebug-prefix-map=old= +// CHECK-MACRO-EMPTY: fmacro-prefix-map=old= diff --git a/clang/test/Driver/fast-math.c b/clang/test/Driver/fast-math.c index 916384216d8c5..da47de260dc90 100644 --- a/clang/test/Driver/fast-math.c +++ b/clang/test/Driver/fast-math.c @@ -170,11 +170,11 @@ // RUN: %clang -### -fno-fast-math -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FAST-MATH %s // RUN: %clang -### -funsafe-math-optimizations -ffinite-math-only \ -// RUN: -fno-math-errno -ffp-contract=fast -c %s 2>&1 \ +// RUN: -fno-math-errno -ffp-contract=fast -fno-rounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FAST-MATH %s // RUN: %clang -### -fno-honor-infinities -fno-honor-nans -fno-math-errno \ // RUN: -fassociative-math -freciprocal-math -fno-signed-zeros \ -// RUN: -fno-trapping-math -ffp-contract=fast -c %s 2>&1 \ +// RUN: -fno-trapping-math -ffp-contract=fast -fno-rounding-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FAST-MATH %s // CHECK-FAST-MATH: "-cc1" // CHECK-FAST-MATH: "-ffast-math" diff --git a/clang/test/Driver/fp-model.c b/clang/test/Driver/fp-model.c new file mode 100644 index 0000000000000..a3984acef62b2 --- /dev/null +++ b/clang/test/Driver/fp-model.c @@ -0,0 +1,137 @@ +// Test that incompatible combinations of -ffp-model= options +// and other floating point options get a warning diagnostic. +// +// REQUIRES: clang-driver + +// RUN: %clang -### -ffp-model=fast -ffp-contract=off -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN %s +// WARN: warning: overriding '-ffp-model=fast' option with '-ffp-contract=off' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=fast -ffp-contract=on -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN1 %s +// WARN1: warning: overriding '-ffp-model=fast' option with '-ffp-contract=on' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -fassociative-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN2 %s +// WARN2: warning: overriding '-ffp-model=strict' option with '-fassociative-math' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -ffast-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN3 %s +// WARN3: warning: overriding '-ffp-model=strict' option with '-ffast-math' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -ffinite-math-only -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN4 %s +// WARN4: warning: overriding '-ffp-model=strict' option with '-ffinite-math-only' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -ffp-contract=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN5 %s +// WARN5: warning: overriding '-ffp-model=strict' option with '-ffp-contract=fast' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -ffp-contract=off -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN6 %s +// WARN6: warning: overriding '-ffp-model=strict' option with '-ffp-contract=off' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -ffp-contract=on -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN7 %s +// WARN7: warning: overriding '-ffp-model=strict' option with '-ffp-contract=on' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -fno-honor-infinities -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN8 %s +// WARN8: warning: overriding '-ffp-model=strict' option with '-fno-honor-infinities' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -fno-honor-nans -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARN9 %s +// WARN9: warning: overriding '-ffp-model=strict' option with '-fno-honor-nans' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -fno-rounding-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARNa %s +// WARNa: warning: overriding '-ffp-model=strict' option with '-fno-rounding-math' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -fno-signed-zeros -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARNb %s +// WARNb: warning: overriding '-ffp-model=strict' option with '-fno-signed-zeros' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -fno-trapping-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARNc %s +// WARNc: warning: overriding '-ffp-model=strict' option with '-fno-trapping-math' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -freciprocal-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARNd %s +// WARNd: warning: overriding '-ffp-model=strict' option with '-freciprocal-math' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -funsafe-math-optimizations -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARNe %s +// WARNe: warning: overriding '-ffp-model=strict' option with '-funsafe-math-optimizations' [-Woverriding-t-option] + +// RUN: %clang -### -ffp-model=strict -Ofast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=WARNf %s +// WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option] + +// RUN: %clang -### -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOROUND %s +// CHECK-NOROUND: "-cc1" +// CHECK-NOROUND: "-fno-rounding-math" + +// RUN: %clang -### -frounding-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-ROUND --implicit-check-not ffp-exception-behavior=strict %s +// CHECK-ROUND: "-cc1" +// CHECK-ROUND: "-frounding-math" + +// RUN: %clang -### -ftrapping-math -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-TRAP %s +// CHECK-TRAP: "-cc1" +// CHECK-TRAP: "-ftrapping-math" +// CHECK-TRAP: "-ffp-exception-behavior=strict" + +// RUN: %clang -### -nostdinc -ffp-model=fast -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPM-FAST %s +// CHECK-FPM-FAST: "-cc1" +// CHECK-FPM-FAST: "-menable-no-infs" +// CHECK-FPM-FAST: "-menable-no-nans" +// CHECK-FPM-FAST: "-menable-unsafe-fp-math" +// CHECK-FPM-FAST: "-fno-signed-zeros" +// CHECK-FPM-FAST: "-mreassociate" +// CHECK-FPM-FAST: "-freciprocal-math" +// CHECK-FPM-FAST: "-ffp-contract=fast" +// CHECK-FPM-FAST: "-fno-rounding-math" +// CHECK-FPM-FAST: "-ffast-math" +// CHECK-FPM-FAST: "-ffinite-math-only" + +// RUN: %clang -### -nostdinc -ffp-model=precise -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPM-PRECISE %s +// CHECK-FPM-PRECISE: "-cc1" +// CHECK-FPM-PRECISE: "-ffp-contract=fast" +// CHECK-FPM-PRECISE: "-fno-rounding-math" + +// RUN: %clang -### -nostdinc -ffp-model=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FPM-STRICT %s +// CHECK-FPM-STRICT: "-cc1" +// CHECK-FPM-STRICT: "-ftrapping-math" +// CHECK-FPM-STRICT: "-frounding-math" +// CHECK-FPM-STRICT: "-ffp-exception-behavior=strict" + +// RUN: %clang -### -nostdinc -ftrapping-math -ffp-exception-behavior=ignore -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-TRAP-IGNORE %s +// CHECK-TRAP-IGNORE: "-cc1" +// CHECK-TRAP-IGNORE: "-fno-rounding-math" +// CHECK-TRAP-IGNORE: "-ffp-exception-behavior=ignore" + + +// RUN: %clang -### -nostdinc -ffp-exception-behavior=strict -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FEB-STRICT %s +// CHECK-FEB-STRICT: "-cc1" +// CHECK-FEB-STRICT: "-fno-rounding-math" +// CHECK-FEB-STRICT: "-ffp-exception-behavior=strict" + +// RUN: %clang -### -nostdinc -ffp-exception-behavior=maytrap -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FEB-MAYTRAP %s +// CHECK-FEB-MAYTRAP: "-cc1" +// CHECK-FEB-MAYTRAP: "-fno-rounding-math" +// CHECK-FEB-MAYTRAP: "-ffp-exception-behavior=maytrap" + +// RUN: %clang -### -nostdinc -ffp-exception-behavior=ignore -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FEB-IGNORE %s +// CHECK-FEB-IGNORE: "-cc1" +// CHECK-FEB-IGNORE: "-fno-rounding-math" +// CHECK-FEB-IGNORE: "-ffp-exception-behavior=ignore" + diff --git a/clang/test/Driver/fuse-ld.c b/clang/test/Driver/fuse-ld.c index 4b2ec7b1bb2ae..13e709ccfdfa4 100644 --- a/clang/test/Driver/fuse-ld.c +++ b/clang/test/Driver/fuse-ld.c @@ -79,13 +79,13 @@ // RUN: %clang %s -### -fuse-ld=lld \ // RUN: -target i686-unknown-windows-msvc 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LLD -// CHECK-WINDOWS-MSVC-LLD: "{{.*}}lld-link" +// CHECK-WINDOWS-MSVC-LLD: "{{.*}}lld-link{{\.exe"|"}} // CHECK-WINDOWS-MSVC-LLD-SAME: "-out:{{.*}}" // RUN: %clang %s -### -fuse-ld=lld-link \ // RUN: -target i686-unknown-windows-msvc 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LLD-LINK -// CHECK-WINDOWS-MSVC-LLD-LINK: "{{.*}}lld-link" +// CHECK-WINDOWS-MSVC-LLD-LINK: "{{.*}}lld-link{{\.exe"|"}} // CHECK-WINDOWS-MSVC-LLD-LINK-SAME: "-out:{{.*}}" // RUN: %clang %s -### -fuse-ld=bfd \ diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 59c1927330c03..14401a947e6f6 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -22,7 +22,6 @@ // COM: {{"[^"]*clang[^"]*"}} // COM-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.amdgcn.bc" -// COM-SAME: "-mlink-builtin-bitcode" "{{.*}}opencl.amdgcn.bc" // COM-SAME: "-mlink-builtin-bitcode" "{{.*}}ocml.amdgcn.bc" // COM-SAME: "-mlink-builtin-bitcode" "{{.*}}ockl.amdgcn.bc" // FLUSHD-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_on.amdgcn.bc" diff --git a/clang/test/Index/index-module-with-vfs.m b/clang/test/Index/index-module-with-vfs.m index 46fa68dfa1308..06944d372d49b 100644 --- a/clang/test/Index/index-module-with-vfs.m +++ b/clang/test/Index/index-module-with-vfs.m @@ -6,7 +6,7 @@ void foo() { } // RUN: rm -rf %t.cache -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: c-index-test -index-file %s -fmodules-cache-path=%t.cache -fmodules -F %t -I %t \ // RUN: -ivfsoverlay %t.yaml -Xclang -fdisable-module-hash | FileCheck %s diff --git a/clang/test/Index/pragma-diag-reparse.c b/clang/test/Index/pragma-diag-reparse.c index 71d0618d70928..aa1413cda089a 100644 --- a/clang/test/Index/pragma-diag-reparse.c +++ b/clang/test/Index/pragma-diag-reparse.c @@ -11,6 +11,7 @@ int main (int argc, const char * argv[]) return x; } +#pragma clang diagnostic ignored "-Wmisleading-indentation" void foo() { int b=0; while (b==b); } // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source-reparse 5 local \ diff --git a/clang/test/InterfaceStubs/XlinkerInputArgs.cpp b/clang/test/InterfaceStubs/XlinkerInputArgs.cpp new file mode 100644 index 0000000000000..cb4ef8aca952d --- /dev/null +++ b/clang/test/InterfaceStubs/XlinkerInputArgs.cpp @@ -0,0 +1,3 @@ +// RUN: %clang -### -Xlinker -Bsymbolic -emit-interface-stubs 2>&1 | FileCheck %s +// CHECK: Bsymbolic +// CHECK-NOT: Bsymbolic diff --git a/clang/test/InterfaceStubs/constructor-using-shadow.cpp b/clang/test/InterfaceStubs/constructor-using-shadow.cpp new file mode 100644 index 0000000000000..d4b85ac73e56d --- /dev/null +++ b/clang/test/InterfaceStubs/constructor-using-shadow.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s + +// CHECK: --- !experimental-ifs-v1 +// CHECK-NEXT: IfsVersion: 1.0 +// CHECK-NEXT: Triple: +// CHECK-NEXT: ObjectFileFormat: ELF +// CHECK-NEXT: Symbols: +// CHECK-NEXT: ... + + // ConstructorUsingShadowDecl +struct Base { Base(int); }; +struct Derived : public Base { using Base::Base; }; diff --git a/clang/test/InterfaceStubs/cxxdeduction-guide.cpp b/clang/test/InterfaceStubs/cxxdeduction-guide.cpp new file mode 100644 index 0000000000000..f09b9d929ca3e --- /dev/null +++ b/clang/test/InterfaceStubs/cxxdeduction-guide.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -o - -emit-interface-stubs -std=c++17 %s | FileCheck %s + +// CHECK: --- !experimental-ifs-v1 +// CHECK-NEXT: IfsVersion: 1.0 +// CHECK-NEXT: Triple: +// CHECK-NEXT: ObjectFileFormat: ELF +// CHECK-NEXT: Symbols: +// CHECK-NEXT: ... + +// CXXDeductionGuideDecl +template struct A { A(); A(T); }; +A() -> A; diff --git a/clang/test/InterfaceStubs/namespace-alias.cpp b/clang/test/InterfaceStubs/namespace-alias.cpp new file mode 100644 index 0000000000000..6a7f27c9b7b0a --- /dev/null +++ b/clang/test/InterfaceStubs/namespace-alias.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s + +// CHECK: --- !experimental-ifs-v1 +// CHECK-NEXT: IfsVersion: 1.0 +// CHECK-NEXT: Triple: +// CHECK-NEXT: ObjectFileFormat: ELF +// CHECK-NEXT: Symbols: +// CHECK-NEXT: ... + +// NamespaceAliasDecl +namespace NS { } +namespace B = NS; diff --git a/clang/test/InterfaceStubs/unresolved-using-typename.cpp b/clang/test/InterfaceStubs/unresolved-using-typename.cpp new file mode 100644 index 0000000000000..e6afc781412a1 --- /dev/null +++ b/clang/test/InterfaceStubs/unresolved-using-typename.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s + +// CHECK: --- !experimental-ifs-v1 +// CHECK-NEXT: IfsVersion: 1.0 +// CHECK-NEXT: Triple: +// CHECK-NEXT: ObjectFileFormat: ELF +// CHECK-NEXT: Symbols: +// CHECK-NEXT: ... + +// UnresolvedUsingTypenameDecl +template class C1 { using ReprType = unsigned; }; +template class C2 : public C1 { using typename C1::Repr; }; diff --git a/clang/test/Misc/warning-wall.c b/clang/test/Misc/warning-wall.c index fadcceefe297e..2b27b67eafa17 100644 --- a/clang/test/Misc/warning-wall.c +++ b/clang/test/Misc/warning-wall.c @@ -90,6 +90,7 @@ CHECK-NEXT: -Wparentheses-equality CHECK-NEXT: -Wdangling-else CHECK-NEXT: -Wswitch CHECK-NEXT: -Wswitch-bool +CHECK-NEXT: -Wmisleading-indentation CHECK-NOT:-W diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h new file mode 100644 index 0000000000000..8adab29eafc76 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h @@ -0,0 +1,2 @@ + +constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h new file mode 100644 index 0000000000000..2bd1b096d6073 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h new file mode 100644 index 0000000000000..b023eebca49c2 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap new file mode 100644 index 0000000000000..1339d627a44af --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap @@ -0,0 +1,14 @@ +module "a" { + export * + header "a.h" +} + +module "b" { + export * + header "b.h" +} + +module "c" { + export * + header "c.h" +} diff --git a/clang/test/Modules/crash-vfs-ivfsoverlay.m b/clang/test/Modules/crash-vfs-ivfsoverlay.m index 00992aa19fad6..d2d2ccbd2546b 100644 --- a/clang/test/Modules/crash-vfs-ivfsoverlay.m +++ b/clang/test/Modules/crash-vfs-ivfsoverlay.m @@ -3,7 +3,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t/m // RUN: cp %S/../VFS/Inputs/actual_module2.map %t/actual_module2.map -// RUN: sed -e "s@INPUT_DIR@%/t@g" -e "s@OUT_DIR@%/t/example@g" \ +// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" -e "s@OUT_DIR@%{/t:regex_replacement}/example@g" \ // RUN: %S/../VFS/Inputs/vfsoverlay2.yaml > %t/srcvfs.yaml // RUN: env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ diff --git a/clang/test/Modules/double-quotes.m b/clang/test/Modules/double-quotes.m index 4ce712ccc6c54..99187fc26654e 100644 --- a/clang/test/Modules/double-quotes.m +++ b/clang/test/Modules/double-quotes.m @@ -4,7 +4,7 @@ // RUN: %hmaptool write %S/Inputs/double-quotes/a.hmap.json %t/a.hmap // RUN: %hmaptool write %S/Inputs/double-quotes/x.hmap.json %t/x.hmap -// RUN: sed -e "s@TEST_DIR@%/S/Inputs/double-quotes@g" \ +// RUN: sed -e "s@TEST_DIR@%{/S:regex_replacement}/Inputs/double-quotes@g" \ // RUN: %S/Inputs/double-quotes/z.yaml > %t/z.yaml // The output with and without modules should be the same diff --git a/clang/test/Modules/framework-public-includes-private.m b/clang/test/Modules/framework-public-includes-private.m index 0f1e3a242a158..37c43e9a6390b 100644 --- a/clang/test/Modules/framework-public-includes-private.m +++ b/clang/test/Modules/framework-public-includes-private.m @@ -4,7 +4,7 @@ // RUN: %hmaptool write %S/Inputs/framework-public-includes-private/a.hmap.json %t/a.hmap // RUN: %hmaptool write %S/Inputs/framework-public-includes-private/z.hmap.json %t/z.hmap -// RUN: sed -e "s@TEST_DIR@%/S/Inputs/framework-public-includes-private@g" \ +// RUN: sed -e "s@TEST_DIR@%{/S:regex_replacement}/Inputs/framework-public-includes-private@g" \ // RUN: %S/Inputs/framework-public-includes-private/z.yaml > %t/z.yaml // The output with and without modules should be the same, without modules first. diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp new file mode 100644 index 0000000000000..36db948b2c4ef --- /dev/null +++ b/clang/test/Modules/merge-lifetime-extended-temporary.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 + +// expected-no-diagnostics +#if ORDER == 1 +#include "c.h" +#include "b.h" +#else +#include "b.h" +#include "c.h" +#endif + +static_assert(PtrTemp1 == &LETemp, ""); +static_assert(PtrTemp1 == PtrTemp2, ""); diff --git a/clang/test/OpenMP/declare_reduction_codegen.cpp b/clang/test/OpenMP/declare_reduction_codegen.cpp index 95c607d83246c..1f6fa2bebea36 100644 --- a/clang/test/OpenMP/declare_reduction_codegen.cpp +++ b/clang/test/OpenMP/declare_reduction_codegen.cpp @@ -85,9 +85,8 @@ SSS d; // CHECK-NEXT: ret void // CHECK-NEXT: } -// CHECK: define {{.*}}void [[INIT:@[^(]+]]([[SSS_INT]]* -// CHECK-LOAD: define {{.*}}void [[INIT:@[^(]+]]([[SSS_INT]]* -void init(SSS &lhs, SSS &rhs) {} +template +void init(T &lhs, T &rhs) {} #pragma omp declare reduction(fun : SSS < int > : omp_out = omp_in) initializer(init(omp_priv, omp_orig)) // CHECK: define internal {{.*}}void @{{[^(]+}}([[SSS_INT]]* noalias %0, [[SSS_INT]]* noalias %1) @@ -95,7 +94,7 @@ void init(SSS &lhs, SSS &rhs) {} // CHECK-NEXT: ret void // CHECK-NEXT: } // CHECK: define internal {{.*}}void @{{[^(]+}}([[SSS_INT]]* noalias %0, [[SSS_INT]]* noalias %1) -// CHECK: call {{.*}}void [[INIT]]( +// CHECK: call {{.*}}void @_Z4initI3SSSIiEEvRT_S3_( // CHECK-NEXT: ret void // CHECK-NEXT: } @@ -104,10 +103,13 @@ void init(SSS &lhs, SSS &rhs) {} // CHECK-LOAD-NEXT: ret void // CHECK-LOAD-NEXT: } // CHECK-LOAD: define internal {{.*}}void @{{[^(]+}}([[SSS_INT]]* noalias %0, [[SSS_INT]]* noalias %1) -// CHECK-LOAD: call {{.*}}void [[INIT]]( +// CHECK-LOAD: call {{.*}}void @_Z4initI3SSSIiEEvRT_S3_( // CHECK-LOAD-NEXT: ret void // CHECK-LOAD-NEXT: } +// CHECK: define {{.*}}void @_Z4initI3SSSIiEEvRT_S3_(%struct.SSS* {{.+}}, %struct.SSS* {{.+}}) +// CHECK-LOAD: define {{.*}}void @_Z4initI3SSSIiEEvRT_S3_(%struct.SSS* {{.+}}, %struct.SSS* {{.+}}) + template T foo(T a) { #pragma omp declare reduction(fun : T : omp_out += omp_in) initializer(omp_priv = 15 * omp_orig) diff --git a/clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp b/clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp new file mode 100644 index 0000000000000..0409c02191445 --- /dev/null +++ b/clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++17 -emit-llvm %s -triple x86_64-linux -fexceptions -fcxx-exceptions -o - -femit-all-decls -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++17 -triple x86_64-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-linux -fexceptions -fcxx-exceptions -std=c++17 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++17 -emit-llvm %s -triple x86_64-linux -fexceptions -fcxx-exceptions -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++17 -triple x86_64-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-linux -fexceptions -fcxx-exceptions -std=c++17 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[STD_D:%.+]]*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), [[STD_D]]* %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[STD_D]]* {{.+}}) +// CHECK: call i32 @__kmpc_reduce_nowait(%struct.ident_t* + +#ifndef HEADER +#define HEADER + +typedef long unsigned a; +namespace std { +template class initializer_list { + const int *b; + a c; +}; +template class d {}; +template class f { +public: + f(initializer_list); +}; +} // namespace std +template void foo(g, h) { + std::d i; +#pragma omp declare reduction(j : std::d : []{}()) +#pragma omp parallel reduction(j : i) + ; +} +void k() { + std::f l{}; + std::f m{2}; + foo(l, m); +} + +#endif // HEADER diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index bacb2c6b06eef..498a0590b51de 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -109,7 +109,7 @@ int main (int argc, char **argv) { // CHECK-DEBUG-NEXT: ret i32 0 // CHECK-DEBUG-NEXT: } -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}} %{{.+}}) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}}) // CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp index 9585bf293695c..01f2b4c42a243 100644 --- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp @@ -1,14 +1,24 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=OMP45 --check-prefix=CHECK // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=OMP50 --check-prefix=CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG + // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // expected-no-diagnostics +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifndef HEADER #define HEADER @@ -75,7 +85,7 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR:%[^,]+]] // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] -// CHECK: call void @__kmpc_dispatch_init_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK: call void @__kmpc_dispatch_init_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32 {{35|1073741859}}, i32 0, i32 8, i32 1, i32 1) // CHECK: [[NEXT:%.+]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32* %{{.+}}, i32* [[LB:%.+]], i32* [[UB:%.+]], i32* %{{.+}}) // CHECK: [[COND:%.+]] = icmp ne i32 [[NEXT]], 0 // CHECK: br i1 [[COND]], label %[[CONT:.+]], label %[[END:.+]] @@ -386,6 +396,51 @@ void inst_templ1() { templ1 (a, z); } +// OMP50: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) +// OMP50: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP50: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 +// OMP50: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] +// OMP50: [[TRUE]]: +// OMP50: br label %[[SWITCH:[^,]+]] +// OMP50: [[FALSE]]: +// OMP50: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP50: br label %[[SWITCH]] +// OMP50: [[SWITCH]]: +// OMP50: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP50: store i64 [[UP]], i64* [[UB]], +// OMP50: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP50: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], + +// ... +// OMP50: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] +// OMP50-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] +// OMP50-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] +// OMP50: [[T1_BODY]]: +// Loop counters i and j updates: +// OMP50: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 +// OMP50-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 +// OMP50-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// OMP50-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 +// OMP50-NEXT: store i32 [[I_2]], i32* +// OMP50: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// OMP50-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// OMP50-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] +// OMP50-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// OMP50-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] +// OMP50-NEXT: store i64 [[J_2_ADD0]], i64* +// simd.for.inc: +// OMP50: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 +// OMP50-NEXT: store i64 [[INC]], i64* +// OMP50-NEXT: br label {{%.+}} +// OMP50: [[T1_END]]: +// OMP50: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) +// OMP50: ret void +// typedef int MyIdx; @@ -674,51 +729,77 @@ void widened(float *a, float *b, float *c, float *d) { // CHECK: ret void } -// CHECK: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], -// CHECK: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 -// CHECK: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] -// CHECK: [[TRUE]]: -// CHECK: br label %[[SWITCH:[^,]+]] -// CHECK: [[FALSE]]: -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], -// CHECK: br label %[[SWITCH]] -// CHECK: [[SWITCH]]: -// CHECK: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] -// CHECK: store i64 [[UP]], i64* [[UB]], -// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], -// CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], +// CHECK-LABEL: if_clause +void if_clause(int a) { + #pragma omp parallel for simd if(a) schedule(static, 1) +for (int i = 0; i < 10; ++i); +} +// CHECK: call void @__kmpc_for_static_init_4( +// OMP50: [[COND:%.+]] = trunc i8 %{{.+}} to i1 +// OMP50: br i1 [[COND]], label {{%?}}[[THEN:.+]], label {{%?}}[[ELSE:.+]] + +// OMP50: [[THEN]]: +// OMP45: br label {{.+}}, !llvm.loop ![[VECT:.+]] +// OMP50: br label {{.+}}, !llvm.loop ![[VECT:.+]] +// OMP50: [[ELSE]]: +// OMP50: br label {{.+}}, !llvm.loop ![[NOVECT:.+]] +// CHECK: call void @__kmpc_for_static_fini( + +// OMP45: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) +// OMP45: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP45: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 +// OMP45: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] +// OMP45: [[TRUE]]: +// OMP45: br label %[[SWITCH:[^,]+]] +// OMP45: [[FALSE]]: +// OMP45: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP45: br label %[[SWITCH]] +// OMP45: [[SWITCH]]: +// OMP45: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP45: store i64 [[UP]], i64* [[UB]], +// OMP45: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP45: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], // ... -// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] -// CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] -// CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] -// CHECK: [[T1_BODY]]: +// OMP45: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] +// OMP45-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] +// OMP45-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] +// OMP45: [[T1_BODY]]: // Loop counters i and j updates: -// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 -// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 -// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] -// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 -// CHECK-NEXT: store i32 [[I_2]], i32* -// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 -// CHECK-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 -// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 -// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] -// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* +// OMP45: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 +// OMP45-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 +// OMP45-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// OMP45-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 +// OMP45-NEXT: store i32 [[I_2]], i32* +// OMP45: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// OMP45-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// OMP45-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] +// OMP45-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// OMP45-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] +// OMP45-NEXT: store i64 [[J_2_ADD0]], i64* // simd.for.inc: -// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 -// CHECK-NEXT: store i64 [[INC]], i64* -// CHECK-NEXT: br label {{%.+}} -// CHECK: [[T1_END]]: -// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) -// CHECK: ret void +// OMP45: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 +// OMP45-NEXT: store i64 [[INC]], i64* +// OMP45-NEXT: br label {{%.+}} +// OMP45: [[T1_END]]: +// OMP45: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) +// OMP45: ret void // + +// OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} +// OMP45-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP45-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} +// OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} +// OMP50-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP50-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} +// OMP50-DAG: ![[NOVECT]] = distinct !{![[NOVECT]], ![[NOVM:.+]]} +// OMP50-DAG: ![[NOVM]] = !{!"llvm.loop.vectorize.enable", i1 false} + // TERM_DEBUG-LABEL: bar int bar() {return 0;}; diff --git a/clang/test/OpenMP/taskloop_simd_ast_print.cpp b/clang/test/OpenMP/taskloop_simd_ast_print.cpp index d5403ed06d97f..59144f344949b 100644 --- a/clang/test/OpenMP/taskloop_simd_ast_print.cpp +++ b/clang/test/OpenMP/taskloop_simd_ast_print.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s --check-prefix CHECK --check-prefix OMP45 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -DOMP5 | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -DOMP5 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -DOMP5 | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s --check-prefix CHECK --check-prefix OMP45 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -DOMP5 | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -DOMP5 +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -DOMP5 | FileCheck %s --check-prefix CHECK --check-prefix OMP50 // expected-no-diagnostics #ifndef HEADER @@ -69,12 +75,17 @@ int main(int argc, char **argv) { // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: a = 2; #pragma omp parallel +#ifdef OMP5 +#pragma omp taskloop simd private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) shared(g) if(simd: argc) mergeable priority(argc) simdlen(16) grainsize(argc) reduction(max: a, e) +#else #pragma omp taskloop simd private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) shared(g) if(argc) mergeable priority(argc) simdlen(16) grainsize(argc) reduction(max: a, e) +#endif // OMP5 for (int i = 0; i < 10; ++i) for (int j = 0; j < 10; ++j) foo(); // CHECK-NEXT: #pragma omp parallel - // CHECK-NEXT: #pragma omp taskloop simd private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(argc) mergeable priority(argc) simdlen(16) grainsize(argc) reduction(max: a,e) + // OMP50-NEXT: #pragma omp taskloop simd private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(simd: argc) mergeable priority(argc) simdlen(16) grainsize(argc) reduction(max: a,e) + // OMP45-NEXT: #pragma omp taskloop simd private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(argc) mergeable priority(argc) simdlen(16) grainsize(argc) reduction(max: a,e) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: for (int j = 0; j < 10; ++j) // CHECK-NEXT: foo(); diff --git a/clang/test/OpenMP/taskloop_simd_codegen.cpp b/clang/test/OpenMP/taskloop_simd_codegen.cpp index 4c84eccb4f856..6b8f3543dfb54 100644 --- a/clang/test/OpenMP/taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_codegen.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=45 -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s --check-prefix CHECK --check-prefix OMP45 +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix OMP50 -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=45 -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER @@ -45,10 +51,13 @@ int main(int argc, char **argv) { for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) -// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// OMP45: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// OMP50: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 -// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 +// OMP45: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 +// OMP50: [[IF_VAL:%.+]] = load i8, i8* % +// OMP50: [[IF:%.+]] = trunc i8 [[IF_VAL]] to i1 // CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 // CHECK: store i64 0, i64* [[DOWN]], diff --git a/clang/test/Parser/cxx-ambig-decl-expr.cpp b/clang/test/Parser/cxx-ambig-decl-expr.cpp index 6507eafb74cd7..02857e21f7c3e 100644 --- a/clang/test/Parser/cxx-ambig-decl-expr.cpp +++ b/clang/test/Parser/cxx-ambig-decl-expr.cpp @@ -17,3 +17,25 @@ auto (*q)() -> int(*)(unknown); // expected-error {{unknown type name 'unknown'} auto (*r)() -> int(*)(unknown + 1); // expected-error {{undeclared identifier 'unknown'}} int f(unknown const x); // expected-error {{unknown type name 'unknown'}} + +// Disambiguating an array declarator from an array subscripting. +void arr() { + int x[] = {1}; // expected-note 2{{previous}} + + // This is array indexing not an array declarator because a comma expression + // is not syntactically a constant-expression. + int(x[1,1]); // expected-warning 2{{unused}} + + // This is array indexing not an array declaration because a braced-init-list + // is not syntactically a constant-expression. + int(x[{0}]); // expected-error {{array subscript is not an integer}} + struct A { + struct Q { int n; }; + int operator[](Q); + } a; + int(a[{0}]); // expected-warning {{unused}} + + // These are array declarations. + int(x[(1,1)]); // expected-error {{redefinition}} + int(x[true ? 1,1 : 1]); // expected-error {{redefinition}} +} diff --git a/clang/test/Parser/warn-misleading-indentation.cpp b/clang/test/Parser/warn-misleading-indentation.cpp new file mode 100644 index 0000000000000..e5ed8bba93c15 --- /dev/null +++ b/clang/test/Parser/warn-misleading-indentation.cpp @@ -0,0 +1,208 @@ +// RUN: %clang_cc1 -x c -fsyntax-only -verify %s +// RUN: %clang_cc1 -x c -fsyntax-only -verify -Wmisleading-indentation -DWITH_WARN %s +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wall -Wno-unused -DWITH_WARN -DCXX17 %s +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wall -Wno-unused -Wno-misleading-indentation -DCXX17 %s + +#ifndef WITH_WARN +// expected-no-diagnostics +#endif + +void f0(int i) { + if (i) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i = i + 1; + int x = 0; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'if'}} +#endif + return; +#ifdef CXX17 + if constexpr (false) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i = 0; + i += 1; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'if'}} +#endif +#endif +} + +void f1(int i) { + for (;i;) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i = i + 1; + i *= 2; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'for'}} +#endif + return; +} + +void f2(int i) { + while (i) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i = i + 1; i *= 2; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'while'}} +#endif + return; +} + +void f3(int i) { + if (i) + i = i + 1; + else +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i *= 2; + const int x = 0; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'else'}} +#endif +} + +#ifdef CXX17 +struct Range { + int *begin() {return nullptr;} + int *end() {return nullptr;} +}; +#endif + +void f4(int i) { + if (i) + i *= 2; + return; + if (i) + i *= 2; + ; + if (i) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i *= 2; + typedef int Int; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'if'}} +#endif +#ifdef CXX17 + Range R; + for (auto e : R) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i *= 2; + using Int2 = int; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'for'}} +#endif +#endif +} + +int bar(void); + +int foo(int* dst) +{ + if (dst) + return + bar(); + if (dst) + dst = dst + \ + bar(); + return 0; +} + +void g(int i) { + if (1) + i = 2; + else + if (i == 3) +#ifdef WITH_WARN +// expected-note@-3 {{here}} +#endif + i = 4; + i = 5; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'if'}} +#endif +} + +// Or this +#define TEST i = 5 +void g0(int i) { + if (1) + i = 2; + else + i = 5; + TEST; +} + +void g1(int i) { + if (1) + i = 2; + else if (i == 3) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i = 4; + i = 5; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'if'}} +#endif +} + +void g2(int i) { + if (1) + i = 2; + else + if (i == 3) + {i = 4;} + i = 5; +} + +void g6(int i) { + if (1) + if (i == 3) +#ifdef WITH_WARN +// expected-note@-2 {{here}} +#endif + i = 4; + i = 5; +#ifdef WITH_WARN +// expected-warning@-2 {{misleading indentation; statement is not part of the previous 'if'}} +#endif +} + +void g7(int i) { + if (1) + i = 4; +#ifdef TEST1 +#endif + i = 5; +} + +void a1(int i) { if (1) i = 4; return; } + +void a2(int i) { + { + if (1) + i = 4; + } + return; +} + +void a3(int i) { + if (1) + { + i = 4; + } + return; +} \ No newline at end of file diff --git a/clang/test/Preprocessor/file_test.c b/clang/test/Preprocessor/file_test.c new file mode 100644 index 0000000000000..3788db6eb090e --- /dev/null +++ b/clang/test/Preprocessor/file_test.c @@ -0,0 +1,23 @@ +// XFAIL: system-windows +// RUN: %clang -E -ffile-prefix-map=%p=/UNLIKELY_PATH/empty -c -o - %s | FileCheck %s +// RUN: %clang -E -fmacro-prefix-map=%p=/UNLIKELY_PATH/empty -c -o - %s | FileCheck %s +// RUN: %clang -E -fmacro-prefix-map=%p=/UNLIKELY_PATH=empty -c -o - %s | FileCheck %s -check-prefix CHECK-EVIL +// RUN: %clang -E -fmacro-prefix-map=%p/= -c -o - %s | FileCheck %s --check-prefix CHECK-REMOVE + +filename: __FILE__ +#include "file_test.h" + +// CHECK: filename: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.c" +// CHECK: filename: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.h" +// CHECK: basefile: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.c" +// CHECK-NOT: filename: + +// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{/|\\\\}}file_test.c" +// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{/|\\\\}}file_test.h" +// CHECK-EVIL: basefile: "/UNLIKELY_PATH=empty{{/|\\\\}}file_test.c" +// CHECK-EVIL-NOT: filename: + +// CHECK-REMOVE: filename: "file_test.c" +// CHECK-REMOVE: filename: "file_test.h" +// CHECK-REMOVE: basefile: "file_test.c" +// CHECK-REMOVE-NOT: filename: diff --git a/clang/test/Preprocessor/file_test.h b/clang/test/Preprocessor/file_test.h new file mode 100644 index 0000000000000..c289e5c836280 --- /dev/null +++ b/clang/test/Preprocessor/file_test.h @@ -0,0 +1,2 @@ +filename: __FILE__ +basefile: __BASE_FILE__ diff --git a/clang/test/Preprocessor/predefined-win-macros.c b/clang/test/Preprocessor/predefined-win-macros.c index 6034c085024dd..928ca6f4fa8d9 100644 --- a/clang/test/Preprocessor/predefined-win-macros.c +++ b/clang/test/Preprocessor/predefined-win-macros.c @@ -47,7 +47,7 @@ // RUN: %clang_cc1 %s -x c++ -E -dM -triple i686-pc-win32 -fms-extensions -fms-compatibility \ // RUN: -fms-compatibility-version=19.00 -std=c++2a -o - | FileCheck -match-full-lines %s --check-prefix=CHECK-MS-CPP2A // CHECK-MS-CPP2A: #define _MSC_VER 1900 -// CHECK-MS-CPP2A: #define _MSVC_LANG 201704L +// CHECK-MS-CPP2A: #define _MSVC_LANG 201705L // RUN: %clang_cc1 -triple i386-windows %s -E -dM -o - \ // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-X86-WIN diff --git a/clang/test/Sema/arm-global-regs.c b/clang/test/Sema/arm-global-regs.c deleted file mode 100644 index 753cb60e68388..0000000000000 --- a/clang/test/Sema/arm-global-regs.c +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -ffreestanding -fsyntax-only -target-feature +reserve-r9 -verify -triple arm-arm-none-eabi %s - -// Check a small subset of valid and invalid global register variable declarations. -// Also check that for global register variables without -ffixed-reg options it throws an error. - -register unsigned arm_r3 __asm("r3"); //expected-error {{register 'r3' unsuitable for global register variables on this target}} - -register unsigned arm_r12 __asm("r12"); //expected-error {{register 'r12' unsuitable for global register variables on this target}} - -register unsigned arm_r5 __asm("r5"); //expected-error {{register 'r5' unsuitable for global register variables on this target}} - -register unsigned arm_r9 __asm("r9"); - -register unsigned arm_r6 __asm("r6"); //expected-error {{-ffixed-r6 is required for global named register variable declaration}} - -register unsigned arm_r7 __asm("r7"); //expected-error {{-ffixed-r7 is required for global named register variable declaration}} - -register unsigned *parm_r7 __asm("r7"); //expected-error {{-ffixed-r7 is required for global named register variable declaration}} - -register unsigned arm_sp __asm("sp"); diff --git a/clang/test/Sema/builtins-mips-features.c b/clang/test/Sema/builtins-mips-features.c new file mode 100644 index 0000000000000..4ea36d7f24dc0 --- /dev/null +++ b/clang/test/Sema/builtins-mips-features.c @@ -0,0 +1,37 @@ +// REQUIRES: mips-registered-target +// RUN: %clang_cc1 -triple mips64 -fsyntax-only -verify %s + +typedef signed char v4i8 __attribute__ ((vector_size(4))); +typedef signed char v4q7 __attribute__ ((vector_size(4))); +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); + +void dsp() { + v4i8 a; + void* p; + + // expected-error@+1 {{this builtin requires 'dsp' ASE, please use -mdsp}} + __builtin_mips_addu_qb(a, a); + // expected-error@+1 {{this builtin requires 'dsp' ASE, please use -mdsp}} + __builtin_mips_lwx(p, 32); +} + +void dspr2() { + v4i8 a; + v4q7 b; + + // expected-error@+1 {{this builtin requires 'dsp r2' ASE, please use -mdspr2}} + __builtin_mips_absq_s_qb(b); + // expected-error@+1 {{this builtin requires 'dsp r2' ASE, please use -mdspr2}} + __builtin_mips_subuh_r_qb(a, a); +} + +void msa() { + v16i8 a; + v16u8 b; + + // expected-error@+1 {{this builtin requires 'msa' ASE, please use -mmsa}} + __builtin_msa_add_a_b(a, a); + // expected-error@+1 {{this builtin requires 'msa' ASE, please use -mmsa}} + __builtin_msa_xori_b(b, 5); +} diff --git a/clang/test/Sema/eval-info.c b/clang/test/Sema/eval-info.c new file mode 100644 index 0000000000000..7f4de4b908207 --- /dev/null +++ b/clang/test/Sema/eval-info.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -fsyntax-only -triple x86_64-unknown-windows-msvc -verify + +// expected-no-diagnostics + +// Make sure the new constant interpolator is not enabled unintentionally +// to cause assertion. +typedef enum x { + a = 1, +} x; diff --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp index 8ba7686944468..3319d5aa2db8c 100644 --- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp +++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp @@ -450,3 +450,8 @@ MyIntPointer handleDerivedToBaseCast1(MySpecialIntPointer ptr) { MyIntPointer handleDerivedToBaseCast2(MyOwnerIntPointer ptr) { return ptr; // expected-warning {{address of stack memory associated with parameter 'ptr' returned}} } + +std::vector::iterator noFalsePositiveWithVectorOfPointers() { + std::vector::iterator> iters; + return iters.at(0); +} diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp index 8db705dcdc67f..c2e443b9bec10 100644 --- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp @@ -18,6 +18,7 @@ namespace std { [[nodiscard]] void *operator new(std::size_t, std::align_val_t, const std::nothrow_t&) noexcept; [[nodiscard]] void *operator new[](std::size_t, const std::nothrow_t&) noexcept; [[nodiscard]] void *operator new[](std::size_t, std::align_val_t, const std::nothrow_t&) noexcept; +[[nodiscard]] void *operator new[](std::size_t, std::align_val_t); void operator delete(void*, const std::nothrow_t&) noexcept; void operator delete(void*, std::align_val_t, const std::nothrow_t&) noexcept; void operator delete[](void*, const std::nothrow_t&) noexcept; @@ -1050,7 +1051,7 @@ namespace dynamic_alloc { // Ensure that we don't try to evaluate these for overflow and crash. These // are all value-dependent expressions. p = new char[n]; - p = new (n) char[n]; + p = new ((std::align_val_t)n) char[n]; p = new char(n); } } diff --git a/clang/test/SemaCXX/deprecated-copy.cpp b/clang/test/SemaCXX/deprecated-copy.cpp new file mode 100644 index 0000000000000..4d3e798d912ba --- /dev/null +++ b/clang/test/SemaCXX/deprecated-copy.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -std=c++11 %s -Wdeprecated-copy -verify +// RUN: %clang_cc1 -std=c++11 %s -Wdeprecated-copy-dtor -DDEPRECATED_COPY_DTOR -verify +// RUN: %clang_cc1 -std=c++11 %s -Wextra -verify + +#ifdef DEPRECATED_COPY_DTOR +struct A { + int *ptr; + ~A() { delete ptr; } // expected-warning {{definition of implicit copy constructor for 'A' is deprecated because it has a user-declared destructor}} +}; + +void foo() { + A a{}; + A b = a; // expected-note {{implicit copy constructor for 'A' first required here}} +} +#else +struct B { + B &operator=(const B &); // expected-warning {{definition of implicit copy constructor for 'B' is deprecated because it has a user-declared copy assignment operator}} +}; + +void bar() { + B b1, b2(b1); // expected-note {{implicit copy constructor for 'B' first required here}} +} +#endif diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp index 5fff855102fb1..0f4edc4d1f343 100644 --- a/clang/test/SemaCXX/lambda-expressions.cpp +++ b/clang/test/SemaCXX/lambda-expressions.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++14 -Wno-unused-value -fsyntax-only -verify -fblocks %s +// RUN: %clang_cc1 -std=c++14 -Wno-unused-value -fsyntax-only -verify -verify=expected-cxx14 -fblocks %s +// RUN: %clang_cc1 -std=c++17 -Wno-unused-value -fsyntax-only -verify -fblocks %s namespace std { class type_info; }; @@ -12,6 +13,7 @@ namespace ExplicitCapture { void ImplicitThisCapture() { [](){(void)Member;}; // expected-error {{'this' cannot be implicitly captured in this context}} + const int var = [](){(void)Member; return 0;}(); // expected-error {{'this' cannot be implicitly captured in this context}} [&](){(void)Member;}; [this](){(void)Member;}; @@ -105,7 +107,7 @@ namespace SpecialMembers { a = static_cast(a); // expected-error {{copy assignment operator is implicitly deleted}} } struct P { - P(const P&) = delete; // expected-note 2{{deleted here}} + P(const P&) = delete; //expected-note {{deleted here}} // expected-cxx14-note {{deleted here}} }; struct Q { ~Q() = delete; // expected-note {{deleted here}} @@ -118,8 +120,8 @@ namespace SpecialMembers { }; void g(P &p, Q &q, R &r) { // FIXME: The note attached to the second error here is just amazingly bad. - auto pp = [p]{}; // expected-error {{deleted constructor}} expected-error {{deleted copy constructor of '(lambda}} - // expected-note@-1 {{copy constructor of '' is implicitly deleted because field '' has a deleted copy constructor}} + auto pp = [p]{}; // expected-error {{deleted constructor}} expected-cxx14-error {{deleted copy constructor of '(lambda}} + // expected-cxx14-note@-1 {{copy constructor of '' is implicitly deleted because field '' has a deleted copy constructor}} auto qq = [q]{}; // expected-error {{deleted function}} expected-note {{because}} auto a = [r]{}; // expected-note 2{{here}} @@ -365,7 +367,7 @@ namespace PR18128 { int (*f())[true ? 1 : ([=]{ return n; }(), 0)]; // expected-error@-1 {{non-local lambda expression cannot have a capture-default}} // expected-error@-2 {{invalid use of non-static data member 'n'}} - // expected-error@-3 {{a lambda expression may not appear inside of a constant expression}} + // expected-cxx14-error@-3 {{a lambda expression may not appear inside of a constant expression}} int g(int k = ([=]{ return n; }(), 0)); // expected-error@-1 {{non-local lambda expression cannot have a capture-default}} // expected-error@-2 {{invalid use of non-static data member 'n'}} @@ -596,8 +598,13 @@ namespace ConversionOperatorDoesNotHaveDeducedReturnType { using ExpectedTypeU = void (*)(T&); struct X { +#if __cplusplus > 201402L + friend constexpr auto T::operator()(int) const; + friend constexpr T::operator ExpectedTypeT() const noexcept; +#else friend auto T::operator()(int) const; friend T::operator ExpectedTypeT() const; +#endif // FIXME: The first of these should match. The second should not. template diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl index b39a30372fbfd..a28069470177c 100644 --- a/clang/test/SemaOpenCL/address-spaces.cl +++ b/clang/test/SemaOpenCL/address-spaces.cl @@ -242,18 +242,25 @@ void func_multiple_addr(void) { __private private_int_t *var6;// expected-warning {{multiple identical address spaces specified for type}} } +void func_with_array_param(const unsigned data[16]); + +__kernel void k() { + unsigned data[16]; + func_with_array_param(data); +} + void func_multiple_addr2(void) { typedef __private int private_int_t; - __private __attribute__((ocl_global)) int var1; // expected-error {{multiple address spaces specified for type}} - __private __attribute__((ocl_global)) int *var2; // expected-error {{multiple address spaces specified for type}} - __attribute__((ocl_global)) private_int_t var3; // expected-error {{multiple address spaces specified for type}} - __attribute__((ocl_global)) private_int_t *var4; // expected-error {{multiple address spaces specified for type}} - __attribute__((ocl_private)) private_int_t var5; // expected-warning {{multiple identical address spaces specified for type}} - __attribute__((ocl_private)) private_int_t *var6; // expected-warning {{multiple identical address spaces specified for type}} + __private __attribute__((opencl_global)) int var1; // expected-error {{multiple address spaces specified for type}} + __private __attribute__((opencl_global)) int *var2; // expected-error {{multiple address spaces specified for type}} + __attribute__((opencl_global)) private_int_t var3; // expected-error {{multiple address spaces specified for type}} + __attribute__((opencl_global)) private_int_t *var4; // expected-error {{multiple address spaces specified for type}} + __attribute__((opencl_private)) private_int_t var5; // expected-warning {{multiple identical address spaces specified for type}} + __attribute__((opencl_private)) private_int_t *var6; // expected-warning {{multiple identical address spaces specified for type}} #if __OPENCL_CPP_VERSION__ - [[clang::ocl_private]] __global int var7; // expected-error {{multiple address spaces specified for type}} - [[clang::ocl_private]] __global int *var8; // expected-error {{multiple address spaces specified for type}} - [[clang::ocl_private]] private_int_t var9; // expected-warning {{multiple identical address spaces specified for type}} - [[clang::ocl_private]] private_int_t *var10; // expected-warning {{multiple identical address spaces specified for type}} + [[clang::opencl_private]] __global int var7; // expected-error {{multiple address spaces specified for type}} + [[clang::opencl_private]] __global int *var8; // expected-error {{multiple address spaces specified for type}} + [[clang::opencl_private]] private_int_t var9; // expected-warning {{multiple identical address spaces specified for type}} + [[clang::opencl_private]] private_int_t *var10; // expected-warning {{multiple identical address spaces specified for type}} #endif // !__OPENCL_CPP_VERSION__ } diff --git a/clang/test/SemaOpenCL/event_t.cl b/clang/test/SemaOpenCL/event_t.cl index e7daf88576cc5..ab7f09170e9cf 100644 --- a/clang/test/SemaOpenCL/event_t.cl +++ b/clang/test/SemaOpenCL/event_t.cl @@ -1,6 +1,6 @@ // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -event_t glb_evt; // expected-error {{the 'event_t' type cannot be used to declare a program scope variable}} +event_t glb_evt; // expected-error {{the 'event_t' type cannot be used to declare a program scope variable}} expected-error{{program scope variable must reside in constant address space}} constant struct evt_s { event_t evt; // expected-error {{the 'event_t' type cannot be used to declare a structure or union field}} @@ -10,7 +10,7 @@ void foo(event_t evt); // expected-note {{passing argument to parameter 'evt' he void kernel ker(event_t argevt) { // expected-error {{'event_t' cannot be used as the type of a kernel parameter}} event_t e; - constant event_t const_evt; // expected-error {{the event_t type can only be used with __private address space qualifier}} + constant event_t const_evt; // expected-error {{the event_t type can only be used with __private address space qualifier}} expected-error{{variable in constant address space must be initialized}} foo(e); foo(0); foo(5); // expected-error {{passing 'int' to parameter of incompatible type 'event_t'}} diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl index 97a01a1fe9311..589d04c64e82d 100644 --- a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl @@ -32,6 +32,7 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef half half4 __attribute__((ext_vector_type(4))); typedef int int2 __attribute__((ext_vector_type(2))); typedef int int4 __attribute__((ext_vector_type(4))); +typedef uint uint4 __attribute__((ext_vector_type(4))); typedef long long2 __attribute__((ext_vector_type(2))); #endif @@ -67,6 +68,13 @@ char4 test_int(char c, char4 c4) { return max(c4, c); } +kernel void basic_vector_misc(float4 a) { + float4 res; + uint4 mask = (uint4)(1, 2, 3, 4); + + res = shuffle(a, mask); +} + kernel void basic_image_readonly(read_only image2d_t image_read_only_image2d) { int2 i2; sampler_t sampler; diff --git a/clang/test/SemaOpenCL/invalid-block.cl b/clang/test/SemaOpenCL/invalid-block.cl index 5d6dc380a37a1..7cbcea96d0acf 100644 --- a/clang/test/SemaOpenCL/invalid-block.cl +++ b/clang/test/SemaOpenCL/invalid-block.cl @@ -58,11 +58,11 @@ void f5(int i) { : bl2(i); // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}} } // A block pointer type and all pointer operations are disallowed -void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}} +void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type 'bl2_t' (aka 'int (__generic ^const)(int)') is invalid in OpenCL}} bl2_t bl = ^(int i) { return 1; }; - bl2_t *p; // expected-error {{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}} + bl2_t *p; // expected-error {{pointer to type 'bl2_t' (aka 'int (__generic ^const)(int)') is invalid in OpenCL}} *bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}} &bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}} } diff --git a/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl index 69fa2b6da823f..de1b4f8858fa0 100644 --- a/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl +++ b/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl @@ -4,7 +4,7 @@ global pipe int gp; // expected-error {{type '__global read_only pipe int' can only be used as a function parameter in OpenCL}} global reserve_id_t rid; // expected-error {{the '__global reserve_id_t' type cannot be used to declare a program scope variable}} -extern pipe write_only int get_pipe(); // expected-error-re{{type '__global write_only pipe int ({{(void)?}})' can only be used as a function parameter in OpenCL}} +extern pipe write_only int get_pipe(); // expected-error-re{{type '__global write_only pipe int ({{(void)?}})' can only be used as a function parameter in OpenCL}} expected-error{{'write_only' attribute only applies to parameters and typedefs}} kernel void test_invalid_reserved_id(reserve_id_t ID) { // expected-error {{'reserve_id_t' cannot be used as the type of a kernel parameter}} } diff --git a/clang/test/SemaOpenCL/sampler_t.cl b/clang/test/SemaOpenCL/sampler_t.cl index fe9d997c89607..888e973cc31d8 100644 --- a/clang/test/SemaOpenCL/sampler_t.cl +++ b/clang/test/SemaOpenCL/sampler_t.cl @@ -48,6 +48,9 @@ constant struct sampler_s { sampler_t bad(void); //expected-error{{declaring function return value of type 'sampler_t' is not allowed}} sampler_t global_nonconst_smp = 0; // expected-error {{global sampler requires a const or constant address space qualifier}} +#ifdef CHECK_SAMPLER_VALUE +// expected-warning@-2{{sampler initializer has invalid Filter Mode bits}} +#endif const sampler_t glb_smp10 = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; const constant sampler_t glb_smp11 = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; @@ -62,7 +65,7 @@ void kernel ker(sampler_t argsmp) { } #if __OPENCL_C_VERSION__ == 200 -void bad(sampler_t*); // expected-error{{pointer to type '__generic sampler_t' is invalid in OpenCL}} +void bad(sampler_t *); // expected-error{{pointer to type 'sampler_t' is invalid in OpenCL}} #else void bad(sampler_t*); // expected-error{{pointer to type 'sampler_t' is invalid in OpenCL}} #endif diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl index ac6b2cabbd0cb..9bffeafb1c2db 100644 --- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl +++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl @@ -65,30 +65,42 @@ template x3::x3(const x3 &t) {} template -T xxx(T *in) { +T xxx(T *in1, T in2) { // This pointer can't be deduced to generic because addr space // will be taken from the template argument. //CHECK: `-VarDecl {{.*}} i 'T *' cinit - T *i = in; + T *i = in1; T ii; + __private T *ptr = ⅈ + ptr = &in2; return *i; } __kernel void test() { int foo[10]; - xxx(&foo[0]); + xxx<__private int>(&foo[0], foo[0]); + // FIXME: Template param deduction fails here because + // temporaries are not in the __private address space. + // It is probably reasonable to put them in __private + // considering that stack and function params are + // implicitly in __private. + // However, if temporaries are left in default addr + // space we should at least pretty print the __private + // addr space. Otherwise diagnostic apprears to be + // confusing. + //xxx(&foo[0], foo[0]); } // Addr space for pointer/reference to an array -//CHECK: FunctionDecl {{.*}} t1 'void (const __generic float (&)[2])' +//CHECK: FunctionDecl {{.*}} t1 'void (const float (__generic &)[2])' void t1(const float (&fYZ)[2]); -//CHECK: FunctionDecl {{.*}} t2 'void (const __generic float (*)[2])' +//CHECK: FunctionDecl {{.*}} t2 'void (const float (__generic *)[2])' void t2(const float (*fYZ)[2]); -//CHECK: FunctionDecl {{.*}} t3 'void (__generic float (((*)))[2])' +//CHECK: FunctionDecl {{.*}} t3 'void (float (((__generic *)))[2])' void t3(float(((*fYZ)))[2]); -//CHECK: FunctionDecl {{.*}} t4 'void (__generic float (((*__generic *)))[2])' +//CHECK: FunctionDecl {{.*}} t4 'void (float (((__generic *__generic *)))[2])' void t4(float(((**fYZ)))[2]); -//CHECK: FunctionDecl {{.*}} t5 'void (__generic float (*__generic (*))[2])' +//CHECK: FunctionDecl {{.*}} t5 'void (float (__generic *(__generic *))[2])' void t5(float (*(*fYZ))[2]); __kernel void k() { diff --git a/clang/test/SemaOpenCLCXX/address-space-lambda.cl b/clang/test/SemaOpenCLCXX/address-space-lambda.cl new file mode 100644 index 0000000000000..cf87bfaeede29 --- /dev/null +++ b/clang/test/SemaOpenCLCXX/address-space-lambda.cl @@ -0,0 +1,53 @@ +//RUN: %clang_cc1 %s -cl-std=clc++ -pedantic -ast-dump -verify | FileCheck %s + +//CHECK: CXXMethodDecl {{.*}} constexpr operator() 'int (int) const __generic' +auto glambda = [](auto a) { return a; }; + +__kernel void test() { + int i; +//CHECK: CXXMethodDecl {{.*}} constexpr operator() 'void () const __generic' + auto llambda = [&]() {i++;}; + llambda(); + glambda(1); + // Test lambda with default parameters +//CHECK: CXXMethodDecl {{.*}} constexpr operator() 'void () const __generic' + [&] {i++;} (); + __constant auto err = [&]() {}; //expected-note-re{{candidate function not viable: address space mismatch in 'this' argument ('__constant (lambda at {{.*}})'), parameter type must be 'const __generic (lambda at {{.*}})'}} + err(); //expected-error-re{{no matching function for call to object of type '__constant (lambda at {{.*}})'}} + // FIXME: There is very limited addr space functionality + // we can test when taking lambda type from the object. + // The limitation is due to addr spaces being added to all + // objects in OpenCL. Once we add metaprogramming utility + // for removing address spaces from a type we can enhance + // testing here. + (*(__constant decltype(llambda) *)nullptr)(); //expected-error{{multiple address spaces specified for type}} + (*(decltype(llambda) *)nullptr)(); +} + +__kernel void test_qual() { +//CHECK: |-CXXMethodDecl {{.*}} constexpr operator() 'void () const' + auto priv1 = []() __private {}; + priv1(); +//CHECK: |-CXXMethodDecl {{.*}} constexpr operator() 'void () const __generic' + auto priv2 = []() __generic {}; + priv2(); + auto priv3 = []() __global {}; //expected-note-re{{candidate function not viable: address space mismatch in 'this' argument ('(lambda at {{.*}})'), parameter type must be 'const __global (lambda at {{.*}})'}} //expected-note{{conversion candidate of type 'void (*)()'}} + priv3(); //expected-error{{no matching function for call to object of type}} + + __constant auto const1 = []() __private{}; //expected-note-re{{candidate function not viable: address space mismatch in 'this' argument ('__constant (lambda at {{.*}})'), parameter type must be 'const (lambda at {{.*}}'}} //expected-note{{conversion candidate of type 'void (*)()'}} + const1(); //expected-error{{no matching function for call to object of type '__constant (lambda at}} + __constant auto const2 = []() __generic{}; //expected-note-re{{candidate function not viable: address space mismatch in 'this' argument ('__constant (lambda at {{.*}})'), parameter type must be 'const __generic (lambda at {{.*}}'}} //expected-note{{conversion candidate of type 'void (*)()'}} + const2(); //expected-error{{no matching function for call to object of type '__constant (lambda at}} +//CHECK: |-CXXMethodDecl {{.*}} constexpr operator() 'void () const __constant' + __constant auto const3 = []() __constant{}; + const3(); + + [&] () __global {} (); //expected-error{{no matching function for call to object of type '(lambda at}} expected-note-re{{candidate function not viable: address space mismatch in 'this' argument ('(lambda at {{.*}})'), parameter type must be 'const __global (lambda at {{.*}})'}} + [&] () __private {} (); //expected-error{{no matching function for call to object of type '(lambda at}} expected-note-re{{candidate function not viable: address space mismatch in 'this' argument ('(lambda at {{.*}})'), parameter type must be 'const (lambda at {{.*}})'}} + + [&] __private {} (); //expected-error{{lambda requires '()' before attribute specifier}} expected-error{{expected body of lambda expression}} + + [&] () mutable __private {} (); + [&] () __private mutable {} (); //expected-error{{expected body of lambda expression}} +} + diff --git a/clang/test/SemaOpenCLCXX/addrspace-auto.cl b/clang/test/SemaOpenCLCXX/addrspace-auto.cl new file mode 100644 index 0000000000000..56fd9eb58ddc4 --- /dev/null +++ b/clang/test/SemaOpenCLCXX/addrspace-auto.cl @@ -0,0 +1,35 @@ +//RUN: %clang_cc1 %s -cl-std=clc++ -pedantic -ast-dump -verify | FileCheck %s + +__constant int i = 1; +//CHECK: |-VarDecl {{.*}} ai '__global int':'__global int' +auto ai = i; + +kernel void test() { + int i; + //CHECK: VarDecl {{.*}} ai 'int':'int' + auto ai = i; + + constexpr int c = 1; + //CHECK: VarDecl {{.*}} used cai '__constant int':'__constant int' + __constant auto cai = c; + //CHECK: VarDecl {{.*}} aii 'int':'int' + auto aii = cai; + + //CHECK: VarDecl {{.*}} ref 'int &' + auto &ref = i; + //CHECK: VarDecl {{.*}} ptr 'int *' + auto *ptr = &i; + //CHECK: VarDecl {{.*}} ref_c '__constant int &' + auto &ref_c = cai; + + //CHECK: VarDecl {{.*}} ptrptr 'int *__generic *' + auto **ptrptr = &ptr; + //CHECK: VarDecl {{.*}} refptr 'int *__generic &' + auto *&refptr = ptr; + + //CHECK: VarDecl {{.*}} invalid gref '__global auto &' + __global auto &gref = i; //expected-error{{variable 'gref' with type '__global auto &' has incompatible initializer of type 'int'}} + __local int *ptr_l; + //CHECK: VarDecl {{.*}} invalid gptr '__global auto *' + __global auto *gptr = ptr_l; //expected-error{{variable 'gptr' with type '__global auto *' has incompatible initializer of type '__local int *'}} +} diff --git a/clang/test/SemaOpenCLCXX/restricted.cl b/clang/test/SemaOpenCLCXX/restricted.cl index fc4938df5bf1e..c00c634073fe7 100644 --- a/clang/test/SemaOpenCLCXX/restricted.cl +++ b/clang/test/SemaOpenCLCXX/restricted.cl @@ -32,12 +32,14 @@ B *test_dynamic_cast(B *p) { __constant _Thread_local int a = 1; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the '_Thread_local' storage class specifier}} // expected-warning@-2 {{'_Thread_local' is a C11 extension}} - +// expected-error@-3 {{thread-local storage is not supported for the current target}} __constant __thread int b = 2; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the '__thread' storage class specifier}} +// expected-error@-2 {{thread-local storage is not supported for the current target}} kernel void test_storage_classes() { register int x; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the 'register' storage class specifier}} thread_local int y; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the 'thread_local' storage class specifier}} + // expected-error@-2 {{thread-local storage is not supported for the current target}} } diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 937e2736498a2..43ea3ebbf2f63 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -57,17 +57,17 @@ struct DeviceValueType; template struct DeviceValueType { - using type = __attribute__((ocl_global)) dataT; + using type = __attribute__((opencl_global)) dataT; }; template struct DeviceValueType { - using type = __attribute__((ocl_constant)) dataT; + using type = __attribute__((opencl_constant)) dataT; }; template struct DeviceValueType { - using type = __attribute__((ocl_local)) dataT; + using type = __attribute__((opencl_local)) dataT; }; template void tmpl(T *t){} void usages() { - __attribute__((ocl_global)) int *GLOB; - __attribute__((ocl_private)) int *PRIV; + __attribute__((opencl_global)) int *GLOB; + __attribute__((opencl_private)) int *PRIV; __attribute__((address_space(3))) int *LOC; int *NoAS; diff --git a/clang/test/SemaSYCL/intel-fpga-local.cpp b/clang/test/SemaSYCL/intel-fpga-local.cpp index b9d515ff2d606..817d0ded3488b 100644 --- a/clang/test/SemaSYCL/intel-fpga-local.cpp +++ b/clang/test/SemaSYCL/intel-fpga-local.cpp @@ -494,7 +494,7 @@ void foo1() //expected-error@+1{{attribute only applies to local non-const variables and non-static data members}} [[intelfpga::max_private_copies(8)]] -__attribute__((ocl_constant)) unsigned int ext_two[64] = { 1, 2, 3 }; +__attribute__((opencl_constant)) unsigned int ext_two[64] = { 1, 2, 3 }; void other2() { diff --git a/clang/test/SemaTemplate/dependent-names.cpp b/clang/test/SemaTemplate/dependent-names.cpp index 67ef238083f04..a8de159a1d463 100644 --- a/clang/test/SemaTemplate/dependent-names.cpp +++ b/clang/test/SemaTemplate/dependent-names.cpp @@ -273,9 +273,6 @@ namespace PR10187 { } int e[10]; }; - void g() { - S().f(); // expected-note {{here}} - } } namespace A2 { diff --git a/clang/test/SemaTemplate/enum-argument.cpp b/clang/test/SemaTemplate/enum-argument.cpp index 7ff4196139901..a79ed8403e9f4 100644 --- a/clang/test/SemaTemplate/enum-argument.cpp +++ b/clang/test/SemaTemplate/enum-argument.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -// expected-no-diagnostics enum Enum { val = 1 }; template struct C { @@ -31,7 +30,7 @@ namespace rdar8020920 { unsigned long long bitfield : e0; void f(int j) { - bitfield + j; + bitfield + j; // expected-warning {{expression result unused}} } }; } diff --git a/clang/test/SemaTemplate/member-access-expr.cpp b/clang/test/SemaTemplate/member-access-expr.cpp index 8dba2e68d6562..ef10d72a0ef80 100644 --- a/clang/test/SemaTemplate/member-access-expr.cpp +++ b/clang/test/SemaTemplate/member-access-expr.cpp @@ -156,7 +156,7 @@ namespace test6 { void get(B **ptr) { // It's okay if at some point we figure out how to diagnose this // at instantiation time. - *ptr = field; + *ptr = field; // expected-error {{assigning to 'test6::B *' from incompatible type 'test6::A *}} } }; } diff --git a/clang/test/SemaTemplate/non-integral-switch-cond.cpp b/clang/test/SemaTemplate/non-integral-switch-cond.cpp new file mode 100644 index 0000000000000..23c8e0ef8d4e1 --- /dev/null +++ b/clang/test/SemaTemplate/non-integral-switch-cond.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +struct NOT_AN_INTEGRAL_TYPE {}; + +template +struct foo { + NOT_AN_INTEGRAL_TYPE Bad; + void run() { + switch (Bad) { // expected-error {{statement requires expression of integer type ('NOT_AN_INTEGRAL_TYPE' invalid)}} + case 0: + break; + } + } +}; diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp index d73a88777d0c8..7a58dd5dcaeda 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp @@ -393,3 +393,12 @@ namespace PR42362 { template struct Z::Q {}; Z::Q q; } + +namespace FunctionConversion { + struct a { void c(char *) noexcept; }; + template void g() { + using T = decltype(f); + using T = void (a::*)(char*); // (not 'noexcept') + } + template void g<&a::c>(); +} diff --git a/clang/test/VFS/external-names.c b/clang/test/VFS/external-names.c index 1e12c930c35ed..0500611c3e408 100644 --- a/clang/test/VFS/external-names.c +++ b/clang/test/VFS/external-names.c @@ -1,5 +1,5 @@ -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" -e "s@EXTERNAL_NAMES@true@" %S/Inputs/use-external-names.yaml > %t.external.yaml -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" -e "s@EXTERNAL_NAMES@false@" %S/Inputs/use-external-names.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" -e "s@EXTERNAL_NAMES@true@" %S/Inputs/use-external-names.yaml > %t.external.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" -e "s@EXTERNAL_NAMES@false@" %S/Inputs/use-external-names.yaml > %t.yaml #include "external-names.h" #ifdef REINCLUDE diff --git a/clang/test/VFS/framework-import.m b/clang/test/VFS/framework-import.m index 858f1f57fbd15..cd923c1dbe0fb 100644 --- a/clang/test/VFS/framework-import.m +++ b/clang/test/VFS/framework-import.m @@ -1,4 +1,4 @@ -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -F %t -ivfsoverlay %t.yaml -fsyntax-only %s #import diff --git a/clang/test/VFS/implicit-include.c b/clang/test/VFS/implicit-include.c index 654e0a87de0e7..06bff4b962dbc 100644 --- a/clang/test/VFS/implicit-include.c +++ b/clang/test/VFS/implicit-include.c @@ -1,4 +1,4 @@ -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -ivfsoverlay %t.yaml -I %t -include "not_real.h" -fsyntax-only %s void foo() { diff --git a/clang/test/VFS/include-mixed-real-and-virtual.c b/clang/test/VFS/include-mixed-real-and-virtual.c index e4297c5737d95..b46ee9af99905 100644 --- a/clang/test/VFS/include-mixed-real-and-virtual.c +++ b/clang/test/VFS/include-mixed-real-and-virtual.c @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t // RUN: echo "void baz(void);" > %t/real.h -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -ivfsoverlay %t.yaml -I %t -fsyntax-only %s #include "not_real.h" diff --git a/clang/test/VFS/include-real-from-virtual.c b/clang/test/VFS/include-real-from-virtual.c index 3a41c4ea2c767..7398be735c5fe 100644 --- a/clang/test/VFS/include-real-from-virtual.c +++ b/clang/test/VFS/include-real-from-virtual.c @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t // RUN: echo "void baz(void);" > %t/real.h -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -ivfsoverlay %t.yaml -I %t -fsyntax-only %s #include "include_real.h" diff --git a/clang/test/VFS/include-virtual-from-real.c b/clang/test/VFS/include-virtual-from-real.c index 0b0d4cd0025a5..b50d5b7292532 100644 --- a/clang/test/VFS/include-virtual-from-real.c +++ b/clang/test/VFS/include-virtual-from-real.c @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t // RUN: echo '#include "not_real.h"' > %t/include_not_real.h -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -ivfsoverlay %t.yaml -I %t -fsyntax-only %s #include "include_not_real.h" diff --git a/clang/test/VFS/include.c b/clang/test/VFS/include.c index 16a1bca71a720..a55e73a38178f 100644 --- a/clang/test/VFS/include.c +++ b/clang/test/VFS/include.c @@ -1,4 +1,4 @@ -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -I %t -ivfsoverlay %t.yaml -fsyntax-only %s #include "not_real.h" diff --git a/clang/test/VFS/incomplete-umbrella.m b/clang/test/VFS/incomplete-umbrella.m index 5b2a1e0b4e1b1..196313927bc08 100644 --- a/clang/test/VFS/incomplete-umbrella.m +++ b/clang/test/VFS/incomplete-umbrella.m @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t/Incomplete.framework/Headers // RUN: echo '// IncompleteReal.h' > %t/Incomplete.framework/Headers/IncompleteReal.h -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: not %clang_cc1 -Werror -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \ // RUN: -ivfsoverlay %t.yaml -F %t -fsyntax-only %s 2>&1 | FileCheck %s diff --git a/clang/test/VFS/module-import.m b/clang/test/VFS/module-import.m index 336a72d31cfa6..25d37bbf0a77b 100644 --- a/clang/test/VFS/module-import.m +++ b/clang/test/VFS/module-import.m @@ -1,5 +1,5 @@ // RUN: rm -rf %t -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ivfsoverlay %t.yaml -I %t -fsyntax-only %s @import not_real; @@ -16,7 +16,7 @@ void foo() { #endif // Override the module map (vfsoverlay2 on top) -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay2.yaml > %t2.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay2.yaml > %t2.yaml // RUN: %clang_cc1 -Werror -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ivfsoverlay %t.yaml -ivfsoverlay %t2.yaml -I %t -fsyntax-only %s // vfsoverlay2 not present diff --git a/clang/test/VFS/module_missing_vfs.m b/clang/test/VFS/module_missing_vfs.m index 6285ac0649278..3cd8fc2c9eed0 100644 --- a/clang/test/VFS/module_missing_vfs.m +++ b/clang/test/VFS/module_missing_vfs.m @@ -5,7 +5,7 @@ // ERROR: virtual filesystem overlay file '{{.*}}' not found // RUN: find %t/mcp -name "A-*.pcm" | count 1 -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/MissingVFS/vfsoverlay.yaml > %t/vfs.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/MissingVFS/vfsoverlay.yaml > %t/vfs.yaml // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/mcp -I %S/Inputs/MissingVFS %s -fsyntax-only -ivfsoverlay %t/vfs.yaml // RUN: find %t/mcp -name "A-*.pcm" | count 1 diff --git a/clang/test/VFS/real-path-found-first.m b/clang/test/VFS/real-path-found-first.m index 8d7d21bf7832e..0d9a6de589fd7 100644 --- a/clang/test/VFS/real-path-found-first.m +++ b/clang/test/VFS/real-path-found-first.m @@ -7,7 +7,7 @@ // RUN: rm -rf %t %t-cache %t.pch // RUN: mkdir -p %t/SomeFramework.framework/Modules // RUN: cat %S/Inputs/some_frame_module.map > %t/SomeFramework.framework/Modules/module.modulemap -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // Build // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t-cache -F %t \ diff --git a/clang/test/VFS/relative-path.c b/clang/test/VFS/relative-path.c index fc4ae151d87f7..24313affc69d8 100644 --- a/clang/test/VFS/relative-path.c +++ b/clang/test/VFS/relative-path.c @@ -1,6 +1,6 @@ // RUN: mkdir -p %t // RUN: cd %t -// RUN: sed -e "s@INPUT_DIR@%/S/Inputs@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsoverlay.yaml > %t.yaml +// RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml // RUN: %clang_cc1 -Werror -I . -ivfsoverlay %t.yaml -fsyntax-only %s #include "not_real.h" diff --git a/clang/test/VFS/test_nonmodular.c b/clang/test/VFS/test_nonmodular.c index dbc1f622f2032..faec0e3a51623 100644 --- a/clang/test/VFS/test_nonmodular.c +++ b/clang/test/VFS/test_nonmodular.c @@ -3,7 +3,7 @@ // We can't have module.map inside Inputs/Nonmodular. // RUN: cp %S/Inputs/Nonmodular/Nonmodular.modulemap %t/outdir/module.modulemap // -// RUN: sed -e "s@VDIR@%/t/vdir@g" -e "s@IN_DIR@%/S@g" -e "s@OUT_DIR@%/t/outdir@g" %S/Inputs/Nonmodular/nonmodular-headers.yaml > %t/vdir/nonmodular-headers.yaml +// RUN: sed -e "s@VDIR@%{/t:regex_replacement}/vdir@g" -e "s@IN_DIR@%{/S:regex_replacement}@g" -e "s@OUT_DIR@%{/t:regex_replacement}/outdir@g" %S/Inputs/Nonmodular/nonmodular-headers.yaml > %t/vdir/nonmodular-headers.yaml // RUN: %clang_cc1 -fmodule-name=Nonmodular -fmodules -Wnon-modular-include-in-framework-module -verify -fimplicit-module-maps -fmodules-cache-path=%t/cache -ivfsoverlay %t/vdir/nonmodular-headers.yaml -I %S/Inputs -F %t/vdir -fsyntax-only %S/Inputs/Nonmodular/test.c // expected-no-diagnostics diff --git a/clang/test/VFS/umbrella-framework-import-skipnonexist.m b/clang/test/VFS/umbrella-framework-import-skipnonexist.m index 6f536b40a9113..a778e26af162f 100644 --- a/clang/test/VFS/umbrella-framework-import-skipnonexist.m +++ b/clang/test/VFS/umbrella-framework-import-skipnonexist.m @@ -4,7 +4,7 @@ // RUN: mkdir -p %t/vdir %t/outdir %t/cache // RUN: cp -R %S/Inputs/Bar.framework %t/outdir/ // -// RUN: sed -e "s@VDIR@%/t/vdir@g" -e "s@OUT_DIR@%/t/outdir@g" %S/Inputs/bar-headers.yaml > %t/vdir/bar-headers.yaml +// RUN: sed -e "s@VDIR@%{/t:regex_replacement}/vdir@g" -e "s@OUT_DIR@%{/t:regex_replacement}/outdir@g" %S/Inputs/bar-headers.yaml > %t/vdir/bar-headers.yaml // RUN: rm -f %t/outdir/Bar.framework/Headers/B.h // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -ivfsoverlay %t/vdir/bar-headers.yaml -F %t/vdir -fsyntax-only %s diff --git a/clang/test/VFS/vfsroot-include.c b/clang/test/VFS/vfsroot-include.c index 2f3ff78bd6e95..2564004ea4b1f 100644 --- a/clang/test/VFS/vfsroot-include.c +++ b/clang/test/VFS/vfsroot-include.c @@ -3,7 +3,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: sed -e "s@TEST_DIR@%/S@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsroot.yaml > %t.yaml +// RUN: sed -e "s@TEST_DIR@%{/S:regex_replacement}@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsroot.yaml > %t.yaml // RUN: not %clang_cc1 -Werror -ivfsoverlay %t.yaml -I %S/Inputs -I /direct-vfs-root-files -fsyntax-only /tests/vfsroot-include.c 2>&1 | FileCheck %s // The line above tests that the compiler input file is looked up through VFS. diff --git a/clang/test/VFS/vfsroot-module.m b/clang/test/VFS/vfsroot-module.m index 979c5c2819773..3ad3e19d4b37f 100644 --- a/clang/test/VFS/vfsroot-module.m +++ b/clang/test/VFS/vfsroot-module.m @@ -3,7 +3,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: sed -e "s@TEST_DIR@%/S@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsroot.yaml > %t.yaml +// RUN: sed -e "s@TEST_DIR@%{/S:regex_replacement}@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsroot.yaml > %t.yaml // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/cache -ivfsoverlay %t.yaml -F %S/Inputs -fsyntax-only /tests/vfsroot-module.m // Test that a file missing from the VFS root is not found, even if it is diff --git a/clang/test/VFS/vfsroot-with-overlay.c b/clang/test/VFS/vfsroot-with-overlay.c index 04a275ed15805..4a2c64cb8734b 100644 --- a/clang/test/VFS/vfsroot-with-overlay.c +++ b/clang/test/VFS/vfsroot-with-overlay.c @@ -3,7 +3,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: sed -e "s@TEST_DIR@%/S@g" -e "s@OUT_DIR@%/t@g" %S/Inputs/vfsroot.yaml > %t.yaml +// RUN: sed -e "s@TEST_DIR@%{/S:regex_replacement}@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsroot.yaml > %t.yaml // RUN: sed -e "s@INPUT_DIR@/indirect-vfs-root-files@g" -e "s@OUT_DIR@/overlay-dir@g" %S/Inputs/vfsoverlay.yaml > %t/vfsoverlay.yaml // RUN: %clang_cc1 -Werror -ivfsoverlay %t.yaml -ivfsoverlay /direct-vfs-root-files/vfsoverlay.yaml -I /overlay-dir -fsyntax-only /tests/vfsroot-with-overlay.c diff --git a/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs b/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs index 7443405efad27..26a0af3b55b50 100644 --- a/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs +++ b/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs @@ -24,6 +24,7 @@ using System.Runtime.InteropServices; using System.Xml.Linq; using System.Linq; +using System.Text; namespace LLVM.ClangFormat { @@ -292,8 +293,7 @@ private void FormatSelection(OptionPageGrid options) string text = view.TextBuffer.CurrentSnapshot.GetText(); int start = view.Selection.Start.Position.GetContainingLine().Start.Position; int end = view.Selection.End.Position.GetContainingLine().End.Position; - int length = end - start; - + // clang-format doesn't support formatting a range that starts at the end // of the file. if (start >= text.Length && text.Length > 0) @@ -301,7 +301,7 @@ private void FormatSelection(OptionPageGrid options) string path = Vsix.GetDocumentParent(view); string filePath = Vsix.GetDocumentPath(view); - RunClangFormatAndApplyReplacements(text, start, length, path, filePath, options, view); + RunClangFormatAndApplyReplacements(text, start, end, path, filePath, options, view); } /// @@ -336,11 +336,11 @@ private void FormatView(IWpfTextView view, OptionPageGrid options) RunClangFormatAndApplyReplacements(text, 0, text.Length, path, filePath, options, view); } - private void RunClangFormatAndApplyReplacements(string text, int offset, int length, string path, string filePath, OptionPageGrid options, IWpfTextView view) + private void RunClangFormatAndApplyReplacements(string text, int start, int end, string path, string filePath, OptionPageGrid options, IWpfTextView view) { try { - string replacements = RunClangFormat(text, offset, length, path, filePath, options); + string replacements = RunClangFormat(text, start, end, path, filePath, options); ApplyClangFormatReplacements(replacements, view); } catch (Exception e) @@ -363,9 +363,9 @@ private void RunClangFormatAndApplyReplacements(string text, int offset, int len /// /// Runs the given text through clang-format and returns the replacements as XML. /// - /// Formats the text range starting at offset of the given length. + /// Formats the text in range start and end. /// - private static string RunClangFormat(string text, int offset, int length, string path, string filePath, OptionPageGrid options) + private static string RunClangFormat(string text, int start, int end, string path, string filePath, OptionPageGrid options) { string vsixPath = Path.GetDirectoryName( typeof(ClangFormatPackage).Assembly.Location); @@ -373,6 +373,9 @@ private static string RunClangFormat(string text, int offset, int length, string System.Diagnostics.Process process = new System.Diagnostics.Process(); process.StartInfo.UseShellExecute = false; process.StartInfo.FileName = vsixPath + "\\clang-format.exe"; + char[] chars = text.ToCharArray(); + int offset = Encoding.UTF8.GetByteCount(chars, 0, start); + int length = Encoding.UTF8.GetByteCount(chars, 0, end) - offset; // Poor man's escaping - this will not work when quotes are already escaped // in the input (but we don't need more). string style = options.Style.Replace("\"", "\\\""); @@ -413,10 +416,11 @@ private static string RunClangFormat(string text, int offset, int length, string // 2. We write everything to the standard output - this cannot block, as clang-format // reads the full standard input before analyzing it without writing anything to the // standard output. - process.StandardInput.Write(text); + StreamWriter utf8Writer = new StreamWriter(process.StandardInput.BaseStream, new UTF8Encoding(false)); + utf8Writer.Write(text); // 3. We notify clang-format that the input is done - after this point clang-format // will start analyzing the input and eventually write the output. - process.StandardInput.Close(); + utf8Writer.Close(); // 4. We must read clang-format's output before waiting for it to exit; clang-format // will close the channel by exiting. string output = process.StandardOutput.ReadToEnd(); @@ -440,13 +444,18 @@ private static void ApplyClangFormatReplacements(string replacements, IWpfTextVi if (replacements.Length == 0) return; + string text = view.TextBuffer.CurrentSnapshot.GetText(); + byte[] bytes = Encoding.UTF8.GetBytes(text); + var root = XElement.Parse(replacements); var edit = view.TextBuffer.CreateEdit(); foreach (XElement replacement in root.Descendants("replacement")) { + int offset = int.Parse(replacement.Attribute("offset").Value); + int length = int.Parse(replacement.Attribute("length").Value); var span = new Span( - int.Parse(replacement.Attribute("offset").Value), - int.Parse(replacement.Attribute("length").Value)); + Encoding.UTF8.GetCharCount(bytes, 0, offset), + Encoding.UTF8.GetCharCount(bytes, offset, length)); edit.Replace(span, replacement.Value); } edit.Apply(); diff --git a/clang/tools/clang-format-vs/README.txt b/clang/tools/clang-format-vs/README.txt index 84e0b451f018d..2cac5b9af9e3c 100644 --- a/clang/tools/clang-format-vs/README.txt +++ b/clang/tools/clang-format-vs/README.txt @@ -10,12 +10,12 @@ the following CMake vars: - BUILD_CLANG_FORMAT_VS_PLUGIN=ON -- NUGET_EXE_PATH=path/to/nuget_dir (unless nuget.exe is already available in PATH) +- NUGET_EXE_DIR=path/to/nuget_dir (unless nuget.exe is already available in PATH) example: cd /d C:\code\llvm mkdir build & cd build - cmake -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DNUGET_EXE_PATH=C:\nuget .. + cmake -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DNUGET_EXE_DIR=C:\nuget .. Once LLVM.sln is generated, build the clang_format_vsix target, which will build ClangFormat.sln, the C# extension application. diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index 9e4f32da884fe..efafed1063910 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -218,7 +218,7 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { if (Clang->getFrontendOpts().TimeTrace) { llvm::timeTraceProfilerInitialize( - Clang->getFrontendOpts().TimeTraceGranularity); + Clang->getFrontendOpts().TimeTraceGranularity, Argv0); } // --print-supported-cpus takes priority over the actual compilation. if (Clang->getFrontendOpts().PrintSupportedCPUs) diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 2078e47195226..a8222356db44a 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -3595,6 +3595,7 @@ enum CXErrorCode clang_parseTranslationUnit2( const char *const *command_line_args, int num_command_line_args, struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files, unsigned options, CXTranslationUnit *out_TU) { + noteBottomOfStack(); SmallVector Args; Args.push_back("clang"); Args.append(command_line_args, command_line_args + num_command_line_args); @@ -3619,6 +3620,7 @@ enum CXErrorCode clang_parseTranslationUnit2FullArgv( CXErrorCode result = CXError_Failure; auto ParseTranslationUnitImpl = [=, &result] { + noteBottomOfStack(); result = clang_parseTranslationUnit_Impl( CIdx, source_filename, command_line_args, num_command_line_args, llvm::makeArrayRef(unsaved_files, num_unsaved_files), options, out_TU); @@ -6622,9 +6624,10 @@ void clang_enableStackTraces(void) { void clang_executeOnThread(void (*fn)(void*), void *user_data, unsigned stack_size) { - llvm::llvm_execute_on_thread( - fn, user_data, - stack_size == 0 ? llvm::None : llvm::Optional(stack_size)); + llvm::llvm_execute_on_thread(fn, user_data, + stack_size == 0 + ? clang::DesiredStackSize + : llvm::Optional(stack_size)); } //===----------------------------------------------------------------------===// diff --git a/clang/unittests/AST/SourceLocationTest.cpp b/clang/unittests/AST/SourceLocationTest.cpp index 6b4dddc3850a9..d104497974f18 100644 --- a/clang/unittests/AST/SourceLocationTest.cpp +++ b/clang/unittests/AST/SourceLocationTest.cpp @@ -648,6 +648,112 @@ TEST(FunctionDecl, FunctionDeclWithNoExceptSpecification) { Language::Lang_CXX11)); } +class FunctionDeclParametersRangeVerifier : public RangeVerifier { +protected: + SourceRange getRange(const FunctionDecl &Function) override { + return Function.getParametersSourceRange(); + } +}; + +TEST(FunctionDeclParameters, FunctionDeclOnlyVariadic) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 8); + EXPECT_TRUE(Verifier.match("void f(...);\n", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclVariadic) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 15); + EXPECT_TRUE(Verifier.match("void f(int a, ...);\n", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclMacroVariadic) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(2, 8, 1, 18); + EXPECT_TRUE(Verifier.match("#define VARIADIC ...\n" + "void f(int a, VARIADIC);\n", + functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclMacroParams) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 16, 2, 20); + EXPECT_TRUE(Verifier.match("#define PARAMS int a, int b\n" + "void f(PARAMS, int c);", + functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclSingleParameter) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 12); + EXPECT_TRUE(Verifier.match("void f(int a);\n", functionDecl())); +} + +TEST(FunctionDeclParameters, MemberFunctionDecl) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(2, 8, 2, 12); + EXPECT_TRUE(Verifier.match("class A{\n" + "void f(int a);\n" + "};", + functionDecl())); +} + +TEST(FunctionDeclParameters, MemberFunctionDeclVariadic) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(2, 8, 2, 15); + EXPECT_TRUE(Verifier.match("class A{\n" + "void f(int a, ...);\n" + "};", + functionDecl())); +} + +TEST(FunctionDeclParameters, StaticFunctionDecl) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(2, 15, 2, 19); + EXPECT_TRUE(Verifier.match("class A{\n" + "static void f(int a);\n" + "};", + functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclMultipleParameters) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 28); + EXPECT_TRUE( + Verifier.match("void f(int a, int b, char *c);\n", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclWithDefaultValue) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 16); + EXPECT_TRUE(Verifier.match("void f(int a = 5);\n", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclWithVolatile) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 22); + EXPECT_TRUE(Verifier.match("void f(volatile int *i);", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclWithConstParam) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 19); + EXPECT_TRUE(Verifier.match("void f(const int *i);", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclWithConstVolatileParam) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 28); + EXPECT_TRUE(Verifier.match("void f(const volatile int *i);", functionDecl())); +} + +TEST(FunctionDeclParameters, FunctionDeclWithParamAttribute) { + FunctionDeclParametersRangeVerifier Verifier; + Verifier.expectRange(1, 8, 1, 36); + EXPECT_TRUE(Verifier.match("void f(__attribute__((unused)) int a) {}", + functionDecl())); +} + TEST(CXXMethodDecl, CXXMethodDeclWithThrowSpecification) { RangeVerifier Verifier; Verifier.expectRange(2, 1, 2, 16); diff --git a/clang/unittests/Driver/DistroTest.cpp b/clang/unittests/Driver/DistroTest.cpp index d0c86d1c54c9e..391c0baaadf5c 100644 --- a/clang/unittests/Driver/DistroTest.cpp +++ b/clang/unittests/Driver/DistroTest.cpp @@ -44,7 +44,7 @@ TEST(DistroTest, DetectUbuntu) { "SUPPORT_URL=\"http://help.ubuntu.com/\"\n" "BUG_REPORT_URL=\"http://bugs.launchpad.net/ubuntu/\"\n")); - Distro UbuntuTrusty{UbuntuTrustyFileSystem}; + Distro UbuntuTrusty{UbuntuTrustyFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::UbuntuTrusty), UbuntuTrusty); ASSERT_TRUE(UbuntuTrusty.IsUbuntu()); ASSERT_FALSE(UbuntuTrusty.IsRedhat()); @@ -52,6 +52,9 @@ TEST(DistroTest, DetectUbuntu) { ASSERT_FALSE(UbuntuTrusty.IsDebian()); ASSERT_FALSE(UbuntuTrusty.IsGentoo()); + Distro UbuntuTrusty2{UbuntuTrustyFileSystem, llvm::Triple("unknown-pc-windows")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), UbuntuTrusty2); + llvm::vfs::InMemoryFileSystem UbuntuYakketyFileSystem; UbuntuYakketyFileSystem.addFile("/etc/debian_version", 0, llvm::MemoryBuffer::getMemBuffer("stretch/sid\n")); @@ -74,7 +77,7 @@ TEST(DistroTest, DetectUbuntu) { "VERSION_CODENAME=yakkety\n" "UBUNTU_CODENAME=yakkety\n")); - Distro UbuntuYakkety{UbuntuYakketyFileSystem}; + Distro UbuntuYakkety{UbuntuYakketyFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::UbuntuYakkety), UbuntuYakkety); ASSERT_TRUE(UbuntuYakkety.IsUbuntu()); ASSERT_FALSE(UbuntuYakkety.IsRedhat()); @@ -109,7 +112,7 @@ TEST(DistroTest, DetectRedhat) { "REDHAT_SUPPORT_PRODUCT=\"Fedora\"\n" "REDHAT_SUPPORT_PRODUCT_VERSION=25\n" "PRIVACY_POLICY_URL=https://fedoraproject.org/wiki/Legal:PrivacyPolicy\n")); - Distro Fedora25{Fedora25FileSystem}; + Distro Fedora25{Fedora25FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::Fedora), Fedora25); ASSERT_FALSE(Fedora25.IsUbuntu()); ASSERT_TRUE(Fedora25.IsRedhat()); @@ -146,7 +149,7 @@ TEST(DistroTest, DetectRedhat) { "REDHAT_SUPPORT_PRODUCT=\"centos\"\n" "REDHAT_SUPPORT_PRODUCT_VERSION=\"7\"\n")); - Distro CentOS7{CentOS7FileSystem}; + Distro CentOS7{CentOS7FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::RHEL7), CentOS7); ASSERT_FALSE(CentOS7.IsUbuntu()); ASSERT_TRUE(CentOS7.IsRedhat()); @@ -174,7 +177,7 @@ TEST(DistroTest, DetectOpenSUSE) { "HOME_URL=\"https://opensuse.org/\"\n" "ID_LIKE=\"suse\"\n")); - Distro OpenSUSELeap421{OpenSUSELeap421FileSystem}; + Distro OpenSUSELeap421{OpenSUSELeap421FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::OpenSUSE), OpenSUSELeap421); ASSERT_FALSE(OpenSUSELeap421.IsUbuntu()); ASSERT_FALSE(OpenSUSELeap421.IsRedhat()); @@ -200,7 +203,7 @@ TEST(DistroTest, DetectOpenSUSE) { "HOME_URL=\"https://opensuse.org/\"\n" "ID_LIKE=\"suse\"\n")); - Distro OpenSUSE132{OpenSUSE132FileSystem}; + Distro OpenSUSE132{OpenSUSE132FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::OpenSUSE), OpenSUSE132); ASSERT_FALSE(OpenSUSE132.IsUbuntu()); ASSERT_FALSE(OpenSUSE132.IsRedhat()); @@ -217,7 +220,7 @@ TEST(DistroTest, DetectOpenSUSE) { llvm::MemoryBuffer::getMemBuffer("LSB_VERSION=\"core-2.0-noarch:core-3.0-noarch:core-2.0-x86_64:core-3.0-x86_64\"\n")); // SLES10 is unsupported and therefore evaluates to unknown - Distro SLES10{SLES10FileSystem}; + Distro SLES10{SLES10FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::UnknownDistro), SLES10); ASSERT_FALSE(SLES10.IsUbuntu()); ASSERT_FALSE(SLES10.IsRedhat()); @@ -240,7 +243,7 @@ TEST(DistroTest, DetectDebian) { "SUPPORT_URL=\"http://www.debian.org/support\"\n" "BUG_REPORT_URL=\"https://bugs.debian.org/\"\n")); - Distro DebianJessie{DebianJessieFileSystem}; + Distro DebianJessie{DebianJessieFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::DebianJessie), DebianJessie); ASSERT_FALSE(DebianJessie.IsUbuntu()); ASSERT_FALSE(DebianJessie.IsRedhat()); @@ -259,7 +262,7 @@ TEST(DistroTest, DetectDebian) { "SUPPORT_URL=\"http://www.debian.org/support\"\n" "BUG_REPORT_URL=\"https://bugs.debian.org/\"\n")); - Distro DebianStretchSid{DebianStretchSidFileSystem}; + Distro DebianStretchSid{DebianStretchSidFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::DebianStretch), DebianStretchSid); ASSERT_FALSE(DebianStretchSid.IsUbuntu()); ASSERT_FALSE(DebianStretchSid.IsRedhat()); @@ -281,7 +284,7 @@ TEST(DistroTest, DetectExherbo) { "SUPPORT_URL=\"irc://irc.freenode.net/#exherbo\"\n" "BUG_REPORT_URL=\"https://bugs.exherbo.org/\"\n")); - Distro Exherbo{ExherboFileSystem}; + Distro Exherbo{ExherboFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::Exherbo), Exherbo); ASSERT_FALSE(Exherbo.IsUbuntu()); ASSERT_FALSE(Exherbo.IsRedhat()); @@ -303,7 +306,7 @@ TEST(DistroTest, DetectArchLinux) { "SUPPORT_URL=\"https://bbs.archlinux.org/\"\n" "BUG_REPORT_URL=\"https://bugs.archlinux.org/\"\n")); - Distro ArchLinux{ArchLinuxFileSystem}; + Distro ArchLinux{ArchLinuxFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::ArchLinux), ArchLinux); ASSERT_FALSE(ArchLinux.IsUbuntu()); ASSERT_FALSE(ArchLinux.IsRedhat()); @@ -328,7 +331,7 @@ TEST(DistroTest, DetectGentoo) { "SUPPORT_URL=\"https://www.gentoo.org/support/\"\n" "BUG_REPORT_URL=\"https://bugs.gentoo.org/\"\n")); - Distro Gentoo{GentooFileSystem}; + Distro Gentoo{GentooFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::Gentoo), Gentoo); ASSERT_FALSE(Gentoo.IsUbuntu()); ASSERT_FALSE(Gentoo.IsRedhat()); @@ -337,4 +340,57 @@ TEST(DistroTest, DetectGentoo) { ASSERT_TRUE(Gentoo.IsGentoo()); } +TEST(DistroTest, DetectWindowsAndCrossCompile) { + + class CountingFileSystem : public llvm::vfs::ProxyFileSystem { + public: + CountingFileSystem() : ProxyFileSystem(llvm::vfs::getRealFileSystem()) {} + + llvm::ErrorOr status(const llvm::Twine &Path) override { + ++Count; + return llvm::vfs::ProxyFileSystem::status(Path); + } + + llvm::ErrorOr> + openFileForRead(const llvm::Twine &Path) override { + ++Count; + return llvm::vfs::ProxyFileSystem::openFileForRead(Path); + } + + unsigned Count{}; + }; + + llvm::IntrusiveRefCntPtr RFS = + llvm::vfs::getRealFileSystem(); + llvm::Triple Host(llvm::sys::getProcessTriple()); + + CountingFileSystem CFileSystem; + Distro LinuxDistro{CFileSystem, llvm::Triple("unknown-pc-linux")}; + if (Host.isOSWindows()) { + ASSERT_EQ(Distro(Distro::UnknownDistro), LinuxDistro); + ASSERT_GT(CFileSystem.Count, 0U); + } + + Distro WinDistro{CFileSystem, llvm::Triple("unknown-pc-windows")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), WinDistro); + ASSERT_GT(CFileSystem.Count, 0U); + + // When running on Windows along with a real file system, ensure that no + // distro is returned if targeting Linux + if (Host.isOSWindows()) { + Distro LinuxRealDistro{*RFS, llvm::Triple("unknown-pc-linux")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), LinuxRealDistro); + } + // When running on Linux, check if the distro is the same as the host when + // targeting Linux + if (Host.isOSLinux()) { + Distro HostDistro{*RFS, Host}; + Distro LinuxRealDistro{*RFS, llvm::Triple("unknown-pc-linux")}; + ASSERT_EQ(HostDistro, LinuxRealDistro); + } + + Distro WinRealDistro{*RFS, llvm::Triple("unknown-pc-windows")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), WinRealDistro); +} + } // end anonymous namespace diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d89ad44e4577f..069542683c0d9 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6990,6 +6990,9 @@ TEST_F(FormatTest, UnderstandsUnaryOperators) { verifyFormat("int a = /* confusing comment */ -1;"); // FIXME: The space after 'i' is wrong, but hopefully, this is a rare case. verifyFormat("int a = i /* confusing comment */++;"); + + verifyFormat("co_yield -1;"); + verifyFormat("co_return -1;"); } TEST_F(FormatTest, DoesNotIndentRelativeToUnaryOperators) { @@ -12552,6 +12555,7 @@ TEST_F(FormatTest, ParsesConfigurationBools) { CHECK_PARSE_BOOL(SpacesInParentheses); CHECK_PARSE_BOOL(SpacesInSquareBrackets); CHECK_PARSE_BOOL(SpacesInAngles); + CHECK_PARSE_BOOL(SpacesInConditionalStatement); CHECK_PARSE_BOOL(SpaceInEmptyBlock); CHECK_PARSE_BOOL(SpaceInEmptyParentheses); CHECK_PARSE_BOOL(SpacesInContainerLiterals); @@ -14877,6 +14881,22 @@ TEST_F(FormatTest, AmbersandInLamda) { verifyFormat("auto lambda = [&a = a]() { a = 2; };", AlignStyle); } + TEST_F(FormatTest, SpacesInConditionalStatement) { + FormatStyle Spaces = getLLVMStyle(); + Spaces.SpacesInConditionalStatement = true; + verifyFormat("for ( int i = 0; i; i++ )\n continue;", Spaces); + verifyFormat("if ( !a )\n return;", Spaces); + verifyFormat("if ( a )\n return;", Spaces); + verifyFormat("if constexpr ( a )\n return;", Spaces); + verifyFormat("switch ( a )\ncase 1:\n return;", Spaces); + verifyFormat("while ( a )\n return;", Spaces); + verifyFormat("while ( (a && b) )\n return;", Spaces); + verifyFormat("do {\n} while ( 1 != 0 );", Spaces); + // Check that space on the left of "::" is inserted as expected at beginning + // of condition. + verifyFormat("while ( ::func() )\n return;", Spaces); +} + TEST_F(FormatTest, AlternativeOperators) { // Test case for ensuring alternate operators are not // combined with their right most neighbour. diff --git a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp index ed44cd86b3e42..b5bba30db78da 100644 --- a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -328,12 +328,17 @@ TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIfdef) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "void skip();\n" "#elif B\n" "#elif C\n" "#else D\n" "#endif\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("#ifdef A\n" + "#elif B\n" + "#elif C\n" + "#endif\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) { @@ -507,6 +512,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { for (auto Source : { "#warning \\\n#include \n", "#error \\\n#include \n", + }) { + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("", Out.data()); + } + + for (auto Source : { "#if MACRO\n#warning '\n#endif\n", "#if MACRO\n#warning \"\n#endif\n", "#if MACRO\n#warning /*\n#endif\n", @@ -515,7 +526,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { "#if MACRO\n#error /*\n#endif\n", }) { ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("#if MACRO\n#endif\n", Out.data()); } } @@ -543,7 +554,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) { #include )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#include \n", Out.data()); + EXPECT_STREQ("#if DEBUG\n#endif\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) { diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp index 87727fe7c9079..91685c0d0c736 100644 --- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp +++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp @@ -859,5 +859,35 @@ TEST_F(TargetAndModeTest, TargetAndMode) { "clang++ --driver-mode=g++ bar.cpp -D bar.cpp"); } +class ExpandResponseFilesTest : public MemDBTest { +public: + ExpandResponseFilesTest() : FS(new llvm::vfs::InMemoryFileSystem) {} + +protected: + void addFile(StringRef File, StringRef Content) { + ASSERT_TRUE( + FS->addFile(File, 0, llvm::MemoryBuffer::getMemBufferCopy(Content))); + } + + std::string getCommand(llvm::StringRef F) { + auto Results = expandResponseFiles(std::make_unique(Entries), FS) + ->getCompileCommands(path(F)); + if (Results.empty()) + return "none"; + return llvm::join(Results[0].CommandLine, " "); + } + + llvm::IntrusiveRefCntPtr FS; +}; + +TEST_F(ExpandResponseFilesTest, ExpandResponseFiles) { + addFile(path(StringRef("rsp1.rsp")), "-Dflag"); + + add("foo.cpp", "clang", "@rsp1.rsp"); + add("bar.cpp", "clang", "-Dflag"); + EXPECT_EQ(getCommand("foo.cpp"), "clang foo.cpp -D foo.cpp -Dflag"); + EXPECT_EQ(getCommand("bar.cpp"), "clang bar.cpp -D bar.cpp -Dflag"); +} + } // end namespace tooling } // end namespace clang diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 6ffe2c43dd0ff..2c462d49ee410 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Annotations.h" #include "llvm/Testing/Support/SupportHelpers.h" +#include "gmock/gmock.h" #include #include #include @@ -663,6 +664,20 @@ TEST_F(TokenBufferTest, SpelledByExpanded) { ValueIs(SameRange(findSpelled("not_mapped")))); } +TEST_F(TokenBufferTest, ExpandedTokensForRange) { + recordTokens(R"cpp( + #define SIGN(X) X##_washere + A SIGN(B) C SIGN(D) E SIGN(F) G + )cpp"); + + SourceRange R(findExpanded("C").front().location(), + findExpanded("F_washere").front().location()); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(Buffer.expandedTokens(R), + SameRange(findExpanded("C D_washere E F_washere"))); + EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); +} + TEST_F(TokenBufferTest, ExpansionStartingAt) { // Object-like macro expansions. recordTokens(R"cpp( diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index c8be48b1361d0..3d30a074ddd82 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -130,7 +130,7 @@ void foo() {} )cpp", R"txt( *: TranslationUnit -|-TopLevelDeclaration +|-SimpleDeclaration | |-int | |-main | |-( @@ -138,7 +138,7 @@ void foo() {} | `-CompoundStatement | |-{ | `-} -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-foo |-( @@ -157,7 +157,7 @@ int main() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-int |-main |-( @@ -202,7 +202,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -224,7 +224,7 @@ void test() { {"void test() { int a = 10; }", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -232,16 +232,18 @@ void test() { `-CompoundStatement |-{ |-DeclarationStatement - | |-int - | |-a - | |-= - | |-10 + | |-SimpleDeclaration + | | |-int + | | |-a + | | |-= + | | `-UnknownExpression + | | `-10 | `-; `-} )txt"}, {"void test() { ; }", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -263,7 +265,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -299,7 +301,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -329,7 +331,7 @@ int test() { return 1; } )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-int |-test |-( @@ -352,7 +354,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -360,18 +362,21 @@ void test() { `-CompoundStatement |-{ |-DeclarationStatement - | |-int - | |-a - | |-[ - | |-3 - | |-] + | |-SimpleDeclaration + | | |-int + | | |-a + | | |-[ + | | |-UnknownExpression + | | | `-3 + | | `-] | `-; |-RangeBasedForStatement | |-for | |-( - | |-int - | |-x - | |-: + | |-SimpleDeclaration + | | |-int + | | |-x + | | `-: | |-UnknownExpression | | `-a | |-) @@ -384,7 +389,7 @@ void test() { // counterpart. {"void main() { foo: return 100; }", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-main |-( @@ -411,7 +416,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -444,7 +449,70 @@ void test() { | | `-) | `-; `-} -)txt"}}; +)txt"}, + // Multiple declarators group into a single SimpleDeclaration. + {R"cpp( + int *a, b; + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-int + |-* + |-a + |-, + |-b + `-; + )txt"}, + {R"cpp( + typedef int *a, b; + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-typedef + |-int + |-* + |-a + |-, + |-b + `-; + )txt"}, + // Multiple declarators inside a statement. + {R"cpp( +void foo() { + int *a, b; + typedef int *ta, tb; +} + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-void + |-foo + |-( + |-) + `-CompoundStatement + |-{ + |-DeclarationStatement + | |-SimpleDeclaration + | | |-int + | | |-* + | | |-a + | | |-, + | | `-b + | `-; + |-DeclarationStatement + | |-SimpleDeclaration + | | |-typedef + | | |-int + | | |-* + | | |-ta + | | |-, + | | `-tb + | `-; + `-} + )txt"}}; for (const auto &T : Cases) { auto *Root = buildTree(T.first); diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 1ca3b5a3f2249..422188a5f3dd4 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -1208,14 +1208,16 @@ Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType, Result::Ptr V = std::make_shared(ArgNum, isa(ArgType)); - if (const auto *ST = dyn_cast(ArgType)) { - if (Promote && ST->isInteger() && ST->sizeInBits() < 32) + if (Promote) { + if (const auto *ST = dyn_cast(ArgType)) { + if (ST->isInteger() && ST->sizeInBits() < 32) + V = std::make_shared(getScalarType("u32"), V); + } else if (const auto *PT = dyn_cast(ArgType)) { V = std::make_shared(getScalarType("u32"), V); - } else if (const auto *PT = dyn_cast(ArgType)) { - V = std::make_shared(getScalarType("u32"), V); - V = std::make_shared("arm_mve_pred_i2v", - std::vector{PT}, - std::vector{V}); + V = std::make_shared("arm_mve_pred_i2v", + std::vector{PT}, + std::vector{V}); + } } return V; diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index cdf761b00c61c..a0f3fb2ddc089 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -161,11 +161,11 @@ class Type { Pointer(false), ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} - Type(TypeSpec TS, char CharMod) + Type(TypeSpec TS, StringRef CharMods) : TS(std::move(TS)), Kind(Void), Immediate(false), Constant(false), Pointer(false), ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { - applyModifier(CharMod); + applyModifiers(CharMods); } /// Returns a type representing "void". @@ -181,13 +181,15 @@ class Type { bool noManglingQ() const { return NoManglingQ; } bool isPointer() const { return Pointer; } + bool isValue() const { return !isVoid() && !isPointer(); } + bool isScalar() const { return isValue() && NumVectors == 0; } + bool isVector() const { return isValue() && NumVectors > 0; } + bool isConstPointer() const { return Constant; } bool isFloating() const { return Kind == Float; } bool isInteger() const { return Kind == SInt || Kind == UInt; } bool isPoly() const { return Kind == Poly; } bool isSigned() const { return Kind == SInt; } bool isImmediate() const { return Immediate; } - bool isScalar() const { return NumVectors == 0; } - bool isVector() const { return NumVectors > 0; } bool isFloat() const { return isFloating() && ElementBitwidth == 32; } bool isDouble() const { return isFloating() && ElementBitwidth == 64; } bool isHalf() const { return isFloating() && ElementBitwidth == 16; } @@ -205,11 +207,11 @@ class Type { // Mutator functions // void makeUnsigned() { - assert(isInteger() && "not a potentially signed type"); + assert(!isVoid() && "not a potentially signed type"); Kind = UInt; } void makeSigned() { - assert(isInteger() && "not a potentially signed type"); + assert(!isVoid() && "not a potentially signed type"); Kind = SInt; } @@ -267,8 +269,8 @@ class Type { /// seen. This is needed by applyModifier as some modifiers /// only take effect if the type size was changed by "Q" or "H". void applyTypespec(bool &Quad); - /// Applies a prototype modifiers to the type. - void applyModifier(char Mod); + /// Applies prototype modifiers to the type. + void applyModifiers(StringRef Mods); }; //===----------------------------------------------------------------------===// @@ -299,8 +301,8 @@ class Intrinsic { /// The Record this intrinsic was created from. Record *R; - /// The unmangled name and prototype. - std::string Name, Proto; + /// The unmangled name. + std::string Name; /// The input and output typespecs. InTS == OutTS except when /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. TypeSpec OutTS, InTS; @@ -323,6 +325,8 @@ class Intrinsic { /// The types of return value [0] and parameters [1..]. std::vector Types; + /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. + int PolymorphicKeyType; /// The local variables defined. std::map Variables; /// NeededEarly - set if any other intrinsic depends on this intrinsic. @@ -358,34 +362,39 @@ class Intrinsic { Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, StringRef Guard, bool IsUnavailable, bool BigEndianSafe) - : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), - CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), - BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), - BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { - // If this builtin takes an immediate argument, we need to #define it rather - // than use a standard declaration, so that SemaChecking can range check - // the immediate passed by the user. - if (Proto.find('i') != std::string::npos) - UseMacro = true; - - // Pointer arguments need to use macros to avoid hiding aligned attributes - // from the pointer type. - if (Proto.find('p') != std::string::npos || - Proto.find('c') != std::string::npos) - UseMacro = true; - - // It is not permitted to pass or return an __fp16 by value, so intrinsics - // taking a scalar float16_t must be implemented as macros. - if (OutTS.find('h') != std::string::npos && - Proto.find('s') != std::string::npos) - UseMacro = true; - + : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), + Guard(Guard.str()), IsUnavailable(IsUnavailable), + BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), + UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), + Emitter(Emitter) { // Modify the TypeSpec per-argument to get a concrete Type, and create // known variables for each. // Types[0] is the return value. - Types.emplace_back(OutTS, Proto[0]); - for (unsigned I = 1; I < Proto.size(); ++I) - Types.emplace_back(InTS, Proto[I]); + unsigned Pos = 0; + Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); + StringRef Mods = getNextModifiers(Proto, Pos); + while (!Mods.empty()) { + Types.emplace_back(InTS, Mods); + if (Mods.find("!") != StringRef::npos) + PolymorphicKeyType = Types.size() - 1; + + Mods = getNextModifiers(Proto, Pos); + } + + for (auto Type : Types) { + // If this builtin takes an immediate argument, we need to #define it rather + // than use a standard declaration, so that SemaChecking can range check + // the immediate passed by the user. + + // Pointer arguments need to use macros to avoid hiding aligned attributes + // from the pointer type. + + // It is not permitted to pass or return an __fp16 by value, so intrinsics + // taking a scalar float16_t must be implemented as macros. + if (Type.isImmediate() || Type.isPointer() || + (Type.isScalar() && Type.isHalf())) + UseMacro = true; + } } /// Get the Record that this intrinsic is based off. @@ -401,23 +410,24 @@ class Intrinsic { /// Return true if the intrinsic takes an immediate operand. bool hasImmediate() const { - return Proto.find('i') != std::string::npos; + return std::any_of(Types.begin(), Types.end(), + [](const Type &T) { return T.isImmediate(); }); } /// Return the parameter index of the immediate operand. unsigned getImmediateIdx() const { - assert(hasImmediate()); - unsigned Idx = Proto.find('i'); - assert(Idx > 0 && "Can't return an immediate!"); - return Idx - 1; + for (unsigned Idx = 0; Idx < Types.size(); ++Idx) + if (Types[Idx].isImmediate()) + return Idx - 1; + llvm_unreachable("Intrinsic has no immediate"); } - unsigned getNumParams() const { return Proto.size() - 1; } + + unsigned getNumParams() const { return Types.size() - 1; } Type getReturnType() const { return Types[0]; } Type getParamType(unsigned I) const { return Types[I + 1]; } Type getBaseType() const { return BaseType; } - /// Return the raw prototype string. - std::string getProto() const { return Proto; } + Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } /// Return true if the prototype has a scalar argument. bool protoHasScalar() const; @@ -471,6 +481,8 @@ class Intrinsic { void indexBody(); private: + StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; + std::string mangleName(std::string Name, ClassKind CK) const; void initVariables(); @@ -614,10 +626,14 @@ std::string Type::builtin_str() const { if (isVoid()) return "v"; - if (Pointer) + if (isPointer()) { // All pointers are void pointers. - S += "v"; - else if (isInteger()) + S = "v"; + if (isConstPointer()) + S += "C"; + S += "*"; + return S; + } else if (isInteger()) switch (ElementBitwidth) { case 8: S += "c"; break; case 16: S += "s"; break; @@ -634,10 +650,11 @@ std::string Type::builtin_str() const { default: llvm_unreachable("Unhandled case!"); } + // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? if (isChar() && !isPointer() && isSigned()) // Make chars explicitly signed. S = "S" + S; - else if (!isPointer() && isInteger() && !isSigned()) + else if (isInteger() && !isSigned()) S = "U" + S; // Constant indices are "int", but have the "constant expression" modifier. @@ -646,11 +663,8 @@ std::string Type::builtin_str() const { S = "I" + S; } - if (isScalar()) { - if (Constant) S += "C"; - if (Pointer) S += "*"; + if (isScalar()) return S; - } std::string Ret; for (unsigned I = 0; I < NumVectors; ++I) @@ -812,202 +826,77 @@ void Type::applyTypespec(bool &Quad) { Bitwidth = Quad ? 128 : 64; } -void Type::applyModifier(char Mod) { +void Type::applyModifiers(StringRef Mods) { bool AppliedQuad = false; applyTypespec(AppliedQuad); - switch (Mod) { - case 'v': - Kind = Void; - break; - case 't': - if (isPoly()) + for (char Mod : Mods) { + switch (Mod) { + case '.': + break; + case 'v': + Kind = Void; + break; + case 'S': + Kind = SInt; + break; + case 'U': Kind = UInt; - break; - case 'b': - Kind = UInt; - NumVectors = 0; - Bitwidth = ElementBitwidth; - break; - case '$': - Kind = SInt; - NumVectors = 0; - Bitwidth = ElementBitwidth; - break; - case 'u': - Kind = UInt; - break; - case 'x': - assert(!isPoly() && "'u' can't be used with poly types!"); - Kind = SInt; - break; - case 'o': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = Float; - break; - case 'y': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = Float; - break; - case 'Y': - Bitwidth = ElementBitwidth = 16; - NumVectors = 0; - Kind = Float; - break; - case 'I': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = SInt; - break; - case 'L': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = SInt; - break; - case 'U': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = UInt; - break; - case 'O': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = UInt; - break; - case 'f': - Kind = Float; - ElementBitwidth = 32; - break; - case 'F': - Kind = Float; - ElementBitwidth = 64; - break; - case 'H': - Kind = Float; - ElementBitwidth = 16; - break; - case '0': - Kind = Float; - if (AppliedQuad) - Bitwidth /= 2; - ElementBitwidth = 16; - break; - case '1': - Kind = Float; - if (!AppliedQuad) - Bitwidth *= 2; - ElementBitwidth = 16; - break; - case 'g': - if (AppliedQuad) - Bitwidth /= 2; - break; - case 'j': - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'w': - ElementBitwidth *= 2; - Bitwidth *= 2; - break; - case 'n': - ElementBitwidth *= 2; - break; - case 'i': - Kind = SInt; - ElementBitwidth = Bitwidth = 32; - NumVectors = 0; - Immediate = true; - break; - case 'l': - Kind = UInt; - ElementBitwidth = Bitwidth = 64; - NumVectors = 0; - Immediate = true; - break; - case 'z': - ElementBitwidth /= 2; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'r': - ElementBitwidth *= 2; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 's': - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'k': - Bitwidth *= 2; - break; - case 'c': - Constant = true; - LLVM_FALLTHROUGH; - case 'p': - Pointer = true; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'h': - ElementBitwidth /= 2; - break; - case 'q': - ElementBitwidth /= 2; - Bitwidth *= 2; - break; - case 'e': - ElementBitwidth /= 2; - Kind = UInt; - break; - case 'm': - ElementBitwidth /= 2; - Bitwidth /= 2; - break; - case 'd': - break; - case '2': - NumVectors = 2; - break; - case '3': - NumVectors = 3; - break; - case '4': - NumVectors = 4; - break; - case 'B': - NumVectors = 2; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'C': - NumVectors = 3; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'D': - NumVectors = 4; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case '7': - if (AppliedQuad) - Bitwidth /= 2; - ElementBitwidth = 8; - break; - case '8': - ElementBitwidth = 8; - break; - case '9': - if (!AppliedQuad) - Bitwidth *= 2; - ElementBitwidth = 8; - break; - default: - llvm_unreachable("Unhandled character!"); + break; + case 'F': + Kind = Float; + break; + case 'P': + Kind = Poly; + break; + case '>': + assert(ElementBitwidth < 128); + ElementBitwidth *= 2; + break; + case '<': + assert(ElementBitwidth > 8); + ElementBitwidth /= 2; + break; + case '1': + NumVectors = 0; + break; + case '2': + NumVectors = 2; + break; + case '3': + NumVectors = 3; + break; + case '4': + NumVectors = 4; + break; + case '*': + Pointer = true; + break; + case 'c': + Constant = true; + break; + case 'Q': + Bitwidth = 128; + break; + case 'q': + Bitwidth = 64; + break; + case 'I': + Kind = SInt; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Immediate = true; + break; + case 'p': + if (isPoly()) + Kind = UInt; + break; + case '!': + // Key type, handled elsewhere. + break; + default: + llvm_unreachable("Unhandled character!"); + } } } @@ -1015,6 +904,19 @@ void Type::applyModifier(char Mod) { // Intrinsic implementation //===----------------------------------------------------------------------===// +StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { + if (Proto.size() == Pos) + return StringRef(); + else if (Proto[Pos] != '(') + return Proto.substr(Pos++, 1); + + size_t Start = Pos + 1; + size_t End = Proto.find(')', Start); + assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); + Pos = End + 1; + return Proto.slice(Start, End); +} + std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { char typeCode = '\0'; bool printNumber = true; @@ -1053,17 +955,13 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { return S; } -static bool isFloatingPointProtoModifier(char Mod) { - return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I'; -} - std::string Intrinsic::getBuiltinTypeStr() { ClassKind LocalCK = getClassKind(true); std::string S; Type RetT = getReturnType(); if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && - !RetT.isFloating() && !RetT.isVoid()) + !RetT.isFloating()) RetT.makeInteger(RetT.getElementSizeInBits(), false); // Since the return value must be one type, return a vector type of the @@ -1078,7 +976,7 @@ std::string Intrinsic::getBuiltinTypeStr() { if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) RetT.makeSigned(); - if (LocalCK == ClassB && !RetT.isVoid() && !RetT.isScalar()) + if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) // Cast to vector of 8-bit elements. RetT.makeInteger(8, true); @@ -1194,7 +1092,7 @@ void Intrinsic::initVariables() { // Modify the TypeSpec per-argument to get a concrete Type, and create // known variables for each. - for (unsigned I = 1; I < Proto.size(); ++I) { + for (unsigned I = 1; I < Types.size(); ++I) { char NameC = '0' + (I - 1); std::string Name = "p"; Name.push_back(NameC); @@ -1315,7 +1213,7 @@ void Intrinsic::emitShadowedArgs() { for (unsigned I = 0; I < getNumParams(); ++I) { // Do not create a temporary for an immediate argument. // That would defeat the whole point of using a macro! - if (hasImmediate() && Proto[I+1] == 'i') + if (getParamType(I).isImmediate()) continue; // Do not create a temporary for pointer arguments. The input // pointer may have an alignment hint. @@ -1339,13 +1237,9 @@ void Intrinsic::emitShadowedArgs() { } bool Intrinsic::protoHasScalar() const { - return (Proto.find('s') != std::string::npos || - Proto.find('z') != std::string::npos || - Proto.find('r') != std::string::npos || - Proto.find('b') != std::string::npos || - Proto.find('$') != std::string::npos || - Proto.find('y') != std::string::npos || - Proto.find('o') != std::string::npos); + return std::any_of(Types.begin(), Types.end(), [](const Type &T) { + return T.isScalar() && !T.isImmediate(); + }); } void Intrinsic::emitBodyAsBuiltinCall() { @@ -1408,13 +1302,7 @@ void Intrinsic::emitBodyAsBuiltinCall() { // Extra constant integer to hold type class enum for this function, e.g. s8 if (getClassKind(true) == ClassB) { - Type ThisTy = getReturnType(); - if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0])) - ThisTy = getParamType(0); - if (ThisTy.isPointer()) - ThisTy = getParamType(1); - - S += utostr(ThisTy.getNeonEnum()); + S += utostr(getPolymorphicKeyType().getNeonEnum()); } else { // Remove extraneous ", ". S.pop_back(); @@ -2019,9 +1907,9 @@ void NeonEmitter::createIntrinsic(Record *R, std::vector> NewTypeSpecs; for (auto TS : TypeSpecs) { if (CartesianProductOfTypes) { - Type DefaultT(TS, 'd'); + Type DefaultT(TS, "."); for (auto SrcTS : TypeSpecs) { - Type DefaultSrcT(SrcTS, 'd'); + Type DefaultSrcT(SrcTS, "."); if (TS == SrcTS || DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) continue; @@ -2101,31 +1989,19 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, continue; uint64_t Mask = 0ULL; - Type Ty = Def->getReturnType(); - if (Def->getProto()[0] == 'v' || - isFloatingPointProtoModifier(Def->getProto()[0])) - Ty = Def->getParamType(0); - if (Ty.isPointer()) - Ty = Def->getParamType(1); - - Mask |= 1ULL << Ty.getNeonEnum(); + Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); // Check if the function has a pointer or const pointer argument. - std::string Proto = Def->getProto(); int PtrArgNum = -1; bool HasConstPtr = false; for (unsigned I = 0; I < Def->getNumParams(); ++I) { - char ArgType = Proto[I + 1]; - if (ArgType == 'c') { - HasConstPtr = true; + const auto &Type = Def->getParamType(I); + if (Type.isPointer()) { PtrArgNum = I; - break; - } - if (ArgType == 'p') { - PtrArgNum = I; - break; + HasConstPtr = Type.isConstPointer(); } } + // For sret builtins, adjust the pointer argument index. if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) PtrArgNum += 1; @@ -2349,7 +2225,7 @@ void NeonEmitter::run(raw_ostream &OS) { bool InIfdef = false; for (auto &TS : TDTypeVec) { bool IsA64 = false; - Type T(TS, 'd'); + Type T(TS, "."); if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) IsA64 = true; @@ -2382,7 +2258,7 @@ void NeonEmitter::run(raw_ostream &OS) { for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { for (auto &TS : TDTypeVec) { bool IsA64 = false; - Type T(TS, 'd'); + Type T(TS, "."); if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) IsA64 = true; @@ -2395,8 +2271,8 @@ void NeonEmitter::run(raw_ostream &OS) { InIfdef = true; } - char M = '2' + (NumMembers - 2); - Type VT(TS, M); + const char Mods[] = { static_cast('2' + (NumMembers - 2)), 0}; + Type VT(TS, Mods); OS << "typedef struct " << VT.str() << " {\n"; OS << " " << T.str() << " val"; OS << "[" << NumMembers << "]"; diff --git a/clang/utils/convert_arm_neon.py b/clang/utils/convert_arm_neon.py new file mode 100644 index 0000000000000..c4b3645294573 --- /dev/null +++ b/clang/utils/convert_arm_neon.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +# This script was committed on 20/11/2019 and it would probably make sense to remove +# it after the next release branches. + +# This script is pipe based and converts an arm_neon.td (or arm_fp16.td) file +# using the old single-char type modifiers to an equivalent new-style form where +# each modifier is orthogonal and they can be composed. +# +# It was used to directly generate the .td files on master, so if you have any +# local additions I would suggest implementing any modifiers here, and running +# it over your entire pre-merge .td files rather than trying to resolve any +# conflicts manually. + +import re, sys +MOD_MAP = { + 'v': 'v', + 'x': 'S', + 'u': 'U', + 'd': '.', + 'g': 'q', + 'j': 'Q', + 'w': '>Q', + 'n': '>', + 'h': '<', + 'q': '', + 's': '1', + 'z': '1<', + 'r': '1>', + 'b': '1U', + '$': '1S', + 'k': 'Q', + '2': '2', + '3': '3', + '4': '4', + 'B': '2Q', + 'C': '3Q', + 'D': '4Q', + 'p': '*', + 'c': 'c*', + '7': '< desired: + res += '<' + cur /= 2 + return res + + +def remap_protocol(proto, typespec, name): + key_type = 0 + + # Conversions like to see the integer type so they know signedness. + if 'vcvt' in name and '_f' in name and name != 'vcvt_f32_f64' and name != 'vcvt_f64_f32': + key_type = 1 + default_width = typespec_elt_size(typespec) + inconsistent_width = False + for elt in typespec: + new_width = typespec_elt_size(elt) + if new_width and new_width != default_width: + inconsistent_width = True + + res = '' + for i, c in enumerate(proto): + # void and pointers make for bad discriminators in CGBuiltin.cpp. + if c in 'vcp': + key_type += 1 + + if c in MOD_MAP: + cur_mod = MOD_MAP[c] + elif inconsistent_width: + # Otherwise it's a fixed output width modifier. + sys.stderr.write(f'warning: {name} uses fixed output size but has inconsistent input widths: {proto} {typespec}\n') + + if c == 'Y': + # y: scalar of half float + resize = get_resize(default_width, 16) + cur_mod = f'1F{resize}' + elif c == 'y': + # y: scalar of float + resize = get_resize(default_width, 32) + cur_mod = f'1F{resize}' + elif c == 'o': + # o: scalar of double + resize = get_resize(default_width, 64) + cur_mod = f'1F{resize}' + elif c == 'I': + # I: scalar of 32-bit signed + resize = get_resize(default_width, 32) + cur_mod = f'1S{resize}' + elif c == 'L': + # L: scalar of 64-bit signed + resize = get_resize(default_width, 64) + cur_mod = f'1S{resize}' + elif c == 'U': + # I: scalar of 32-bit unsigned + resize = get_resize(default_width, 32) + cur_mod = f'1U{resize}' + elif c == 'O': + # O: scalar of 64-bit unsigned + resize = get_resize(default_width, 64) + cur_mod = f'1U{resize}' + elif c == 'f': + # f: float (int args) + resize = get_resize(default_width, 32) + cur_mod = f'F{resize}' + elif c == 'F': + # F: double (int args) + resize = get_resize(default_width, 64) + cur_mod = f'F{resize}' + elif c == 'H': + # H: half (int args) + resize = get_resize(default_width, 16) + cur_mod = f'F{resize}' + elif c == '0': + # 0: half (int args), ignore 'Q' size modifier. + resize = get_resize(default_width, 16) + cur_mod = f'Fq{resize}' + elif c == '1': + # 1: half (int args), force 'Q' size modifier. + resize = get_resize(default_width, 16) + cur_mod = f'FQ{resize}' + + if len(cur_mod) == 0: + raise Exception(f'WTF: {c} in {name}') + + if key_type != 0 and key_type == i: + cur_mod += '!' + + if len(cur_mod) == 1: + res += cur_mod + else: + res += '(' + cur_mod + ')' + + return res + +def replace_insts(m): + start, end = m.span('proto') + start -= m.start() + end -= m.start() + new_proto = remap_protocol(m['proto'], m['kinds'], m['name']) + return m.group()[:start] + new_proto + m.group()[end:] + +INST = re.compile(r'Inst<"(?P.*?)",\s*"(?P.*?)",\s*"(?P.*?)"') + +new_td = INST.sub(replace_insts, sys.stdin.read()) +sys.stdout.write(new_td) diff --git a/compiler-rt/include/fuzzer/FuzzedDataProvider.h b/compiler-rt/include/fuzzer/FuzzedDataProvider.h index fd895b767d9e6..3e069eba69b46 100644 --- a/compiler-rt/include/fuzzer/FuzzedDataProvider.h +++ b/compiler-rt/include/fuzzer/FuzzedDataProvider.h @@ -263,6 +263,12 @@ class FuzzedDataProvider { // which seems to be a natural choice for other implementations as well. // To increase the odds even more, we also call |shrink_to_fit| below. std::vector result(size); + if (size == 0) { + if (num_bytes_to_consume != 0) + abort(); + return result; + } + std::memcpy(result.data(), data_ptr_, num_bytes_to_consume); Advance(num_bytes_to_consume); diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 1a63aad0e8f66..feacd21d0865b 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -241,6 +241,13 @@ set(x86_ARCH_SOURCES powixf2.c ) +if (NOT MSVC) + set(x86_ARCH_SOURCES + ${x86_ARCH_SOURCES} + i386/fp_mode.c + ) +endif () + if (NOT MSVC) set(x86_64_SOURCES ${GENERIC_TF_SOURCES} diff --git a/compiler-rt/lib/builtins/i386/fp_mode.c b/compiler-rt/lib/builtins/i386/fp_mode.c new file mode 100644 index 0000000000000..62ab771222c09 --- /dev/null +++ b/compiler-rt/lib/builtins/i386/fp_mode.c @@ -0,0 +1,39 @@ +//===----- lib/i386/fp_mode.c - Floaing-point mode utilities -----*- C -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../fp_mode.h" + +#define X87_TONEAREST 0x0000 +#define X87_DOWNWARD 0x0400 +#define X87_UPWARD 0x0800 +#define X87_TOWARDZERO 0x0c00 +#define X87_RMODE_MASK (X87_TONEAREST | X87_UPWARD | X87_DOWNWARD | X87_TOWARDZERO) + +FE_ROUND_MODE __fe_getround() { + // Assume that the rounding mode state for the fpu agrees with the SSE unit. + unsigned short cw; + __asm__ __volatile__ ("fnstcw %0" : "=m" (cw)); + + switch (cw & X87_RMODE_MASK) { + case X87_TONEAREST: + return FE_TONEAREST; + case X87_DOWNWARD: + return FE_DOWNWARD; + case X87_UPWARD: + return FE_UPWARD; + case X87_TOWARDZERO: + return FE_TOWARDZERO; + } + return FE_TONEAREST; +} + +int __fe_raise_inexact() { + float f = 1.0f, g = 3.0f; + __asm__ __volatile__ ("fdivs %1" : "+t" (f) : "m" (g)); + return 0; +} diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 606139f2e1787..5df8c0ac91063 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -371,12 +371,13 @@ static void PrintTagInfoAroundAddr(tag_t *tag_ptr, uptr num_rows, InternalScopedString s(GetPageSizeCached() * 8); for (tag_t *row = beg_row; row < end_row; row += row_len) { s.append("%s", row == center_row_beg ? "=>" : " "); + s.append("%p:", row); for (uptr i = 0; i < row_len; i++) { s.append("%s", row + i == tag_ptr ? "[" : " "); print_tag(s, &row[i]); s.append("%s", row + i == tag_ptr ? "]" : " "); } - s.append("%s\n", row == center_row_beg ? "<=" : " "); + s.append("\n"); } Printf("%s", s.data()); } diff --git a/compiler-rt/lib/profile/InstrProfilingUtil.c b/compiler-rt/lib/profile/InstrProfilingUtil.c index 13301f341fc5a..bf5a9670fe18c 100644 --- a/compiler-rt/lib/profile/InstrProfilingUtil.c +++ b/compiler-rt/lib/profile/InstrProfilingUtil.c @@ -207,8 +207,9 @@ COMPILER_RT_VISIBILITY FILE *lprofOpenFileEx(const char *ProfileName) { f = fdopen(fd, "r+b"); #elif defined(_WIN32) // FIXME: Use the wide variants to handle Unicode filenames. - HANDLE h = CreateFileA(ProfileName, GENERIC_READ | GENERIC_WRITE, 0, 0, - OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); + HANDLE h = CreateFileA(ProfileName, GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, 0); if (h == INVALID_HANDLE_VALUE) return NULL; @@ -218,6 +219,10 @@ COMPILER_RT_VISIBILITY FILE *lprofOpenFileEx(const char *ProfileName) { return NULL; } + if (lprofLockFd(fd) != 0) + PROF_WARN("Data may be corrupted during profile merging : %s\n", + "Fail to obtain file lock due to system limit."); + f = _fdopen(fd, "r+b"); if (f == 0) { CloseHandle(h); diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h index 166e19e2b8f28..1d00a5d76d04d 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -67,7 +67,7 @@ struct AndroidSvelteConfig { struct FuchsiaConfig { // 1GB Regions typedef SizeClassAllocator64 Primary; - typedef MapAllocator<> Secondary; + typedef MapAllocator<0U> Secondary; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. }; diff --git a/compiler-rt/lib/scudo/standalone/atomic_helpers.h b/compiler-rt/lib/scudo/standalone/atomic_helpers.h index 47037d764e252..6c84ba86ed329 100644 --- a/compiler-rt/lib/scudo/standalone/atomic_helpers.h +++ b/compiler-rt/lib/scudo/standalone/atomic_helpers.h @@ -21,12 +21,12 @@ enum memory_order { memory_order_acq_rel = 4, memory_order_seq_cst = 5 }; -COMPILER_CHECK(memory_order_relaxed == __ATOMIC_RELAXED); -COMPILER_CHECK(memory_order_consume == __ATOMIC_CONSUME); -COMPILER_CHECK(memory_order_acquire == __ATOMIC_ACQUIRE); -COMPILER_CHECK(memory_order_release == __ATOMIC_RELEASE); -COMPILER_CHECK(memory_order_acq_rel == __ATOMIC_ACQ_REL); -COMPILER_CHECK(memory_order_seq_cst == __ATOMIC_SEQ_CST); +static_assert(memory_order_relaxed == __ATOMIC_RELAXED, ""); +static_assert(memory_order_consume == __ATOMIC_CONSUME, ""); +static_assert(memory_order_acquire == __ATOMIC_ACQUIRE, ""); +static_assert(memory_order_release == __ATOMIC_RELEASE, ""); +static_assert(memory_order_acq_rel == __ATOMIC_ACQ_REL, ""); +static_assert(memory_order_seq_cst == __ATOMIC_SEQ_CST, ""); struct atomic_u8 { typedef u8 Type; @@ -60,7 +60,7 @@ struct atomic_uptr { }; template -INLINE typename T::Type atomic_load(const volatile T *A, memory_order MO) { +inline typename T::Type atomic_load(const volatile T *A, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); typename T::Type V; __atomic_load(&A->ValDoNotUse, &V, MO); @@ -68,29 +68,29 @@ INLINE typename T::Type atomic_load(const volatile T *A, memory_order MO) { } template -INLINE void atomic_store(volatile T *A, typename T::Type V, memory_order MO) { +inline void atomic_store(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); __atomic_store(&A->ValDoNotUse, &V, MO); } -INLINE void atomic_thread_fence(memory_order) { __sync_synchronize(); } +inline void atomic_thread_fence(memory_order) { __sync_synchronize(); } template -INLINE typename T::Type atomic_fetch_add(volatile T *A, typename T::Type V, +inline typename T::Type atomic_fetch_add(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); return __atomic_fetch_add(&A->ValDoNotUse, V, MO); } template -INLINE typename T::Type atomic_fetch_sub(volatile T *A, typename T::Type V, +inline typename T::Type atomic_fetch_sub(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); return __atomic_fetch_sub(&A->ValDoNotUse, V, MO); } template -INLINE typename T::Type atomic_exchange(volatile T *A, typename T::Type V, +inline typename T::Type atomic_exchange(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); typename T::Type R; @@ -99,7 +99,7 @@ INLINE typename T::Type atomic_exchange(volatile T *A, typename T::Type V, } template -INLINE bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, +inline bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, typename T::Type Xchg, memory_order MO) { return __atomic_compare_exchange(&A->ValDoNotUse, Cmp, &Xchg, false, MO, @@ -107,7 +107,7 @@ INLINE bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, } template -INLINE bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, +inline bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, typename T::Type Xchg, memory_order MO) { return __atomic_compare_exchange(&A->ValDoNotUse, Cmp, &Xchg, true, MO, @@ -117,17 +117,17 @@ INLINE bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, // Clutter-reducing helpers. template -INLINE typename T::Type atomic_load_relaxed(const volatile T *A) { +inline typename T::Type atomic_load_relaxed(const volatile T *A) { return atomic_load(A, memory_order_relaxed); } template -INLINE void atomic_store_relaxed(volatile T *A, typename T::Type V) { +inline void atomic_store_relaxed(volatile T *A, typename T::Type V) { atomic_store(A, V, memory_order_relaxed); } template -INLINE typename T::Type atomic_compare_exchange(volatile T *A, +inline typename T::Type atomic_compare_exchange(volatile T *A, typename T::Type Cmp, typename T::Type Xchg) { atomic_compare_exchange_strong(A, &Cmp, Xchg, memory_order_acquire); diff --git a/compiler-rt/lib/scudo/standalone/checksum.h b/compiler-rt/lib/scudo/standalone/checksum.h index 092342fd6efbd..a63b1b4f064d1 100644 --- a/compiler-rt/lib/scudo/standalone/checksum.h +++ b/compiler-rt/lib/scudo/standalone/checksum.h @@ -37,7 +37,7 @@ enum class Checksum : u8 { // significantly on memory accesses, as well as 1K of CRC32 table, on platforms // that do no support hardware CRC32. The checksum itself is 16-bit, which is at // odds with CRC32, but enough for our needs. -INLINE u16 computeBSDChecksum(u16 Sum, uptr Data) { +inline u16 computeBSDChecksum(u16 Sum, uptr Data) { for (u8 I = 0; I < sizeof(Data); I++) { Sum = static_cast((Sum >> 1) | ((Sum & 1) << 15)); Sum = static_cast(Sum + (Data & 0xff)); diff --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h index 9ae75823ba778..f4d68b3ac6c4a 100644 --- a/compiler-rt/lib/scudo/standalone/chunk.h +++ b/compiler-rt/lib/scudo/standalone/chunk.h @@ -20,7 +20,7 @@ namespace scudo { extern Checksum HashAlgorithm; -INLINE u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { +inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { // If the hardware CRC32 feature is defined here, it was enabled everywhere, // as opposed to only for crc32_hw.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a result @@ -71,7 +71,7 @@ struct UnpackedHeader { uptr Checksum : 16; }; typedef atomic_u64 AtomicPackedHeader; -COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader)); +static_assert(sizeof(UnpackedHeader) == sizeof(PackedHeader), ""); // Those constants are required to silence some -Werror=conversion errors when // assigning values to the related bitfield variables. @@ -86,13 +86,12 @@ constexpr uptr getHeaderSize() { return roundUpTo(sizeof(PackedHeader), 1U << SCUDO_MIN_ALIGNMENT_LOG); } -INLINE AtomicPackedHeader *getAtomicHeader(void *Ptr) { +inline AtomicPackedHeader *getAtomicHeader(void *Ptr) { return reinterpret_cast(reinterpret_cast(Ptr) - getHeaderSize()); } -INLINE -const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) { +inline const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) { return reinterpret_cast( reinterpret_cast(Ptr) - getHeaderSize()); } @@ -100,7 +99,7 @@ const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) { // We do not need a cryptographically strong hash for the checksum, but a CRC // type function that can alert us in the event a header is invalid or // corrupted. Ideally slightly better than a simple xor of all fields. -static INLINE u16 computeHeaderChecksum(u32 Cookie, const void *Ptr, +static inline u16 computeHeaderChecksum(u32 Cookie, const void *Ptr, UnpackedHeader *Header) { UnpackedHeader ZeroChecksumHeader = *Header; ZeroChecksumHeader.Checksum = 0; @@ -110,7 +109,7 @@ static INLINE u16 computeHeaderChecksum(u32 Cookie, const void *Ptr, ARRAY_SIZE(HeaderHolder)); } -INLINE void storeHeader(u32 Cookie, void *Ptr, +inline void storeHeader(u32 Cookie, void *Ptr, UnpackedHeader *NewUnpackedHeader) { NewUnpackedHeader->Checksum = computeHeaderChecksum(Cookie, Ptr, NewUnpackedHeader); @@ -118,9 +117,8 @@ INLINE void storeHeader(u32 Cookie, void *Ptr, atomic_store_relaxed(getAtomicHeader(Ptr), NewPackedHeader); } -INLINE -void loadHeader(u32 Cookie, const void *Ptr, - UnpackedHeader *NewUnpackedHeader) { +inline void loadHeader(u32 Cookie, const void *Ptr, + UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); *NewUnpackedHeader = bit_cast(NewPackedHeader); if (UNLIKELY(NewUnpackedHeader->Checksum != @@ -128,7 +126,7 @@ void loadHeader(u32 Cookie, const void *Ptr, reportHeaderCorruption(const_cast(Ptr)); } -INLINE void compareExchangeHeader(u32 Cookie, void *Ptr, +inline void compareExchangeHeader(u32 Cookie, void *Ptr, UnpackedHeader *NewUnpackedHeader, UnpackedHeader *OldUnpackedHeader) { NewUnpackedHeader->Checksum = @@ -141,8 +139,8 @@ INLINE void compareExchangeHeader(u32 Cookie, void *Ptr, reportHeaderRace(Ptr); } -INLINE -bool isValid(u32 Cookie, const void *Ptr, UnpackedHeader *NewUnpackedHeader) { +inline bool isValid(u32 Cookie, const void *Ptr, + UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); *NewUnpackedHeader = bit_cast(NewPackedHeader); return NewUnpackedHeader->Checksum == diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 8560c2d3599f3..b355a4746fae3 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -184,7 +184,7 @@ template class Allocator { ((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize()); // Takes care of extravagantly large sizes as well as integer overflows. - COMPILER_CHECK(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment); + static_assert(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment, ""); if (UNLIKELY(Size >= MaxAllowedMallocSize)) { if (Options.MayReturnNull) return nullptr; @@ -457,6 +457,18 @@ template class Allocator { Stats.get(S); } + // Returns true if the pointer provided was allocated by the current + // allocator instance, which is compliant with tcmalloc's ownership concept. + // A corrupted chunk will not be reported as owned, which is WAI. + bool isOwned(const void *Ptr) { + initThreadMaybe(); + if (!Ptr || !isAligned(reinterpret_cast(Ptr), MinAlignment)) + return false; + Chunk::UnpackedHeader Header; + return Chunk::isValid(Cookie, Ptr, &Header) && + Header.State == Chunk::State::Allocated; + } + private: using SecondaryT = typename Params::Secondary; typedef typename PrimaryT::SizeClassMap SizeClassMap; @@ -468,6 +480,9 @@ template class Allocator { static const uptr MaxAllowedMallocSize = FIRST_32_SECOND_64(1UL << 31, 1ULL << 40); + static_assert(MinAlignment >= sizeof(Chunk::PackedHeader), + "Minimal alignment must at least cover a chunk header."); + // Constants used by the chunk iteration mechanism. static const u32 BlockMarker = 0x44554353U; static const uptr InvalidChunk = ~static_cast(0); @@ -523,7 +538,7 @@ template class Allocator { reportSanityCheckError("class ID"); } - static INLINE void *getBlockBegin(const void *Ptr, + static inline void *getBlockBegin(const void *Ptr, Chunk::UnpackedHeader *Header) { return reinterpret_cast( reinterpret_cast(Ptr) - Chunk::getHeaderSize() - @@ -531,7 +546,7 @@ template class Allocator { } // Return the size of a chunk as requested during its allocation. - INLINE uptr getSize(const void *Ptr, Chunk::UnpackedHeader *Header) { + inline uptr getSize(const void *Ptr, Chunk::UnpackedHeader *Header) { const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (LIKELY(Header->ClassId)) return SizeOrUnusedBytes; diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index c015d1ca56696..a76eb6bbc1645 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -19,22 +19,22 @@ namespace scudo { -template INLINE Dest bit_cast(const Source &S) { - COMPILER_CHECK(sizeof(Dest) == sizeof(Source)); +template inline Dest bit_cast(const Source &S) { + static_assert(sizeof(Dest) == sizeof(Source), ""); Dest D; memcpy(&D, &S, sizeof(D)); return D; } -INLINE constexpr uptr roundUpTo(uptr X, uptr Boundary) { +inline constexpr uptr roundUpTo(uptr X, uptr Boundary) { return (X + Boundary - 1) & ~(Boundary - 1); } -INLINE constexpr uptr roundDownTo(uptr X, uptr Boundary) { +inline constexpr uptr roundDownTo(uptr X, uptr Boundary) { return X & ~(Boundary - 1); } -INLINE constexpr bool isAligned(uptr X, uptr Alignment) { +inline constexpr bool isAligned(uptr X, uptr Alignment) { return (X & (Alignment - 1)) == 0; } @@ -48,14 +48,14 @@ template void Swap(T &A, T &B) { B = Tmp; } -INLINE bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } +inline bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } -INLINE uptr getMostSignificantSetBitIndex(uptr X) { +inline uptr getMostSignificantSetBitIndex(uptr X) { DCHECK_NE(X, 0U); return SCUDO_WORDSIZE - 1U - static_cast(__builtin_clzl(X)); } -INLINE uptr roundUpToPowerOfTwo(uptr Size) { +inline uptr roundUpToPowerOfTwo(uptr Size) { DCHECK(Size); if (isPowerOfTwo(Size)) return Size; @@ -65,17 +65,17 @@ INLINE uptr roundUpToPowerOfTwo(uptr Size) { return 1UL << (Up + 1); } -INLINE uptr getLeastSignificantSetBitIndex(uptr X) { +inline uptr getLeastSignificantSetBitIndex(uptr X) { DCHECK_NE(X, 0U); return static_cast(__builtin_ctzl(X)); } -INLINE uptr getLog2(uptr X) { +inline uptr getLog2(uptr X) { DCHECK(isPowerOfTwo(X)); return getLeastSignificantSetBitIndex(X); } -INLINE u32 getRandomU32(u32 *State) { +inline u32 getRandomU32(u32 *State) { // ANSI C linear congruential PRNG (16-bit output). // return (*State = *State * 1103515245 + 12345) >> 16; // XorShift (32-bit output). @@ -85,11 +85,11 @@ INLINE u32 getRandomU32(u32 *State) { return *State; } -INLINE u32 getRandomModN(u32 *State, u32 N) { +inline u32 getRandomModN(u32 *State, u32 N) { return getRandomU32(State) % N; // [0, N) } -template INLINE void shuffle(T *A, u32 N, u32 *RandState) { +template inline void shuffle(T *A, u32 N, u32 *RandState) { if (N <= 1) return; u32 State = *RandState; @@ -100,7 +100,7 @@ template INLINE void shuffle(T *A, u32 N, u32 *RandState) { // Hardware specific inlinable functions. -INLINE void yieldProcessor(u8 Count) { +inline void yieldProcessor(u8 Count) { #if defined(__i386__) || defined(__x86_64__) __asm__ __volatile__("" ::: "memory"); for (u8 I = 0; I < Count; I++) @@ -117,7 +117,7 @@ INLINE void yieldProcessor(u8 Count) { extern uptr PageSizeCached; uptr getPageSizeSlow(); -INLINE uptr getPageSizeCached() { +inline uptr getPageSizeCached() { // Bionic uses a hardcoded value. if (SCUDO_ANDROID) return 4096U; diff --git a/compiler-rt/lib/scudo/standalone/flags_parser.cpp b/compiler-rt/lib/scudo/standalone/flags_parser.cpp index 070c08b019384..be39fcd4f8879 100644 --- a/compiler-rt/lib/scudo/standalone/flags_parser.cpp +++ b/compiler-rt/lib/scudo/standalone/flags_parser.cpp @@ -108,7 +108,7 @@ void FlagParser::parseString(const char *S) { Pos = OldPos; } -INLINE bool parseBool(const char *Value, bool *b) { +inline bool parseBool(const char *Value, bool *b) { if (strncmp(Value, "0", 1) == 0 || strncmp(Value, "no", 2) == 0 || strncmp(Value, "false", 5) == 0) { *b = false; diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp index 0a9483ae1dd0d..b3d72de158cf9 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -29,7 +29,7 @@ void NORETURN die() { __builtin_trap(); } // We zero-initialize the Extra parameter of map(), make sure this is consistent // with ZX_HANDLE_INVALID. -COMPILER_CHECK(ZX_HANDLE_INVALID == 0); +static_assert(ZX_HANDLE_INVALID == 0, ""); static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { // Only scenario so far. @@ -171,7 +171,7 @@ u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { - COMPILER_CHECK(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN); + static_assert(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN, ""); if (UNLIKELY(!Buffer || !Length || Length > MaxRandomLength)) return false; _zx_cprng_draw(Buffer, Length); diff --git a/compiler-rt/lib/scudo/standalone/internal_defs.h b/compiler-rt/lib/scudo/standalone/internal_defs.h index f80c0f621a462..8f6a89ecba737 100644 --- a/compiler-rt/lib/scudo/standalone/internal_defs.h +++ b/compiler-rt/lib/scudo/standalone/internal_defs.h @@ -30,7 +30,6 @@ #define INTERFACE __attribute__((visibility("default"))) #define WEAK __attribute__((weak)) -#define INLINE inline #define ALWAYS_INLINE inline __attribute__((always_inline)) #define ALIAS(X) __attribute__((alias(X))) // Please only use the ALIGNED macro before the type. Using ALIGNED after the @@ -126,8 +125,6 @@ void NORETURN reportCheckFailed(const char *File, int Line, die(); \ } while (0) -#define COMPILER_CHECK(Pred) static_assert(Pred, "") - } // namespace scudo #endif // SCUDO_INTERNAL_DEFS_H_ diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index a0d8560c3f6c9..945324914d30f 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -42,7 +42,7 @@ template class SizeClassAllocator32 { public: typedef SizeClassMapT SizeClassMap; // Regions should be large enough to hold the largest Block. - COMPILER_CHECK((1UL << RegionSizeLog) >= SizeClassMap::MaxSize); + static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, ""); typedef SizeClassAllocator32 ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; @@ -204,7 +204,7 @@ template class SizeClassAllocator32 { uptr AllocatedUser; ReleaseToOsInfo ReleaseInfo; }; - COMPILER_CHECK(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0); + static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr computeRegionId(uptr Mem) { const uptr Id = Mem >> RegionSizeLog; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 559742d05ad9e..b208ff69bb055 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -215,7 +215,7 @@ template class SizeClassAllocator64 { MapPlatformData Data; ReleaseToOsInfo ReleaseInfo; }; - COMPILER_CHECK(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0); + static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr PrimaryBase; RegionInfo *RegionInfoArray; diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h index 4b3f368ad9659..2bf7e804ef359 100644 --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -59,7 +59,7 @@ struct QuarantineBatch { void shuffle(u32 State) { ::scudo::shuffle(Batch, Count, &State); } }; -COMPILER_CHECK(sizeof(QuarantineBatch) <= (1U << 13)); // 8Kb. +static_assert(sizeof(QuarantineBatch) <= (1U << 13), ""); // 8Kb. // Per-thread cache of memory blocks. template class QuarantineCache { diff --git a/compiler-rt/lib/scudo/standalone/report.cpp b/compiler-rt/lib/scudo/standalone/report.cpp index 12d851ff019ad..80cc6eda2af92 100644 --- a/compiler-rt/lib/scudo/standalone/report.cpp +++ b/compiler-rt/lib/scudo/standalone/report.cpp @@ -34,7 +34,7 @@ class ScopedErrorReport { ScopedString Message; }; -INLINE void NORETURN trap() { __builtin_trap(); } +inline void NORETURN trap() { __builtin_trap(); } // This could potentially be called recursively if a CHECK fails in the reports. void NORETURN reportCheckFailed(const char *File, int Line, diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index f288fc7d7592b..ab68e5a1d38d7 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -50,6 +50,10 @@ static Header *getHeader(const void *Ptr) { template class MapAllocator { public: + // Ensure the freelist is disabled on Fuchsia, since it doesn't support + // releasing Secondary blocks yet. + static_assert(!SCUDO_FUCHSIA || MaxFreeListSize == 0U, ""); + void initLinkerInitialized(GlobalStats *S) { Stats.initLinkerInitialized(); if (LIKELY(S)) @@ -205,10 +209,11 @@ void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, template void MapAllocator::deallocate(void *Ptr) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); + const uptr Block = reinterpret_cast(H); { ScopedLock L(Mutex); InUseBlocks.remove(H); - const uptr CommitSize = H->BlockEnd - reinterpret_cast(H); + const uptr CommitSize = H->BlockEnd - Block; FreedBytes += CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); @@ -225,11 +230,10 @@ void MapAllocator::deallocate(void *Ptr) { if (!Inserted) FreeBlocks.push_back(H); const uptr RoundedAllocationStart = - roundUpTo(reinterpret_cast(H) + LargeBlock::getHeaderSize(), - getPageSizeCached()); + roundUpTo(Block + LargeBlock::getHeaderSize(), getPageSizeCached()); MapPlatformData Data = H->Data; // TODO(kostyak): use release_to_os_interval_ms - releasePagesToOS(H->MapBase, RoundedAllocationStart - H->MapBase, + releasePagesToOS(Block, RoundedAllocationStart - Block, H->BlockEnd - RoundedAllocationStart, &Data); return; } diff --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h index 59d6ede57ed27..947526e8aea17 100644 --- a/compiler-rt/lib/scudo/standalone/size_class_map.h +++ b/compiler-rt/lib/scudo/standalone/size_class_map.h @@ -49,7 +49,7 @@ class SizeClassMap { static const uptr MaxSize = 1UL << MaxSizeLog; static const uptr NumClasses = MidClass + ((MaxSizeLog - MidSizeLog) << S) + 1; - COMPILER_CHECK(NumClasses <= 256); + static_assert(NumClasses <= 256, ""); static const uptr LargestClassId = NumClasses - 1; static const uptr BatchClassId = 0; diff --git a/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp b/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp index 7e6f1d21f6e9c..103cd24624ba5 100644 --- a/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/atomic_helpers.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "atomic_helpers.h" namespace scudo { diff --git a/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp b/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp index df0646bcd99d0..7db7feb6accdc 100644 --- a/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "bytemap.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "bytemap.h" +#include #include template void testMap(T &Map, scudo::uptr Size) { diff --git a/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp b/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp index 43bbd47a3c35a..361d33c7e4641 100644 --- a/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "checksum.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "checksum.h" #include diff --git a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp index 57e128ec82666..13da70eff85b8 100644 --- a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "chunk.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "chunk.h" #include diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index 9205467998ed1..fec5f864aeb7d 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "allocator_config.h" #include "combined.h" -#include "gtest/gtest.h" - #include #include #include +#include static std::mutex Mutex; static std::condition_variable Cv; @@ -21,17 +22,6 @@ static bool Ready = false; static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; -// This allows us to turn on the Quarantine for specific tests. The Quarantine -// parameters are on the low end, to avoid having to loop excessively in some -// tests. -static bool UseQuarantine = false; -extern "C" const char *__scudo_default_options() { - if (!UseQuarantine) - return ""; - return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" - "quarantine_max_chunk_size=1024"; -} - template static void testAllocator() { using AllocatorT = scudo::Allocator; auto Deleter = [](AllocatorT *A) { @@ -42,6 +32,12 @@ template static void testAllocator() { Deleter); Allocator->reset(); + EXPECT_FALSE(Allocator->isOwned(&Mutex)); + EXPECT_FALSE(Allocator->isOwned(&Allocator)); + scudo::u64 StackVariable = 0x42424242U; + EXPECT_FALSE(Allocator->isOwned(&StackVariable)); + EXPECT_EQ(StackVariable, 0x42424242U); + constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U); // This allocates and deallocates a bunch of chunks, with a wide range of @@ -56,6 +52,7 @@ template static void testAllocator() { const scudo::uptr Size = (1U << SizeLog) + Delta; void *P = Allocator->allocate(Size, Origin, Align); EXPECT_NE(P, nullptr); + EXPECT_TRUE(Allocator->isOwned(P)); EXPECT_TRUE(scudo::isAligned(reinterpret_cast(P), Align)); EXPECT_LE(Size, Allocator->getUsableSize(P)); memset(P, 0xaa, Size); @@ -121,7 +118,7 @@ template static void testAllocator() { const scudo::uptr NewSize = DataSize + Delta; void *NewP = Allocator->reallocate(P, NewSize); EXPECT_EQ(NewP, P); - for (scudo::uptr I = 0; I < scudo::Min(DataSize, NewSize); I++) + for (scudo::uptr I = 0; I < DataSize - 32; I++) EXPECT_EQ((reinterpret_cast(NewP))[I], Marker); } Allocator->deallocate(P, Origin); @@ -168,15 +165,15 @@ template static void testAllocator() { } TEST(ScudoCombinedTest, BasicCombined) { - testAllocator(); -#if SCUDO_WORDSIZE == 64U + UseQuarantine = false; + testAllocator(); +#if SCUDO_FUCHSIA testAllocator(); -#endif - // The following configs should work on all platforms. +#else + testAllocator(); UseQuarantine = true; testAllocator(); - UseQuarantine = false; - testAllocator(); +#endif } template static void stressAllocator(AllocatorT *A) { @@ -223,20 +220,21 @@ template static void testAllocatorThreaded() { } TEST(ScudoCombinedTest, ThreadedCombined) { - testAllocatorThreaded(); -#if SCUDO_WORDSIZE == 64U + UseQuarantine = false; + testAllocatorThreaded(); +#if SCUDO_FUCHSIA testAllocatorThreaded(); -#endif +#else + testAllocatorThreaded(); UseQuarantine = true; testAllocatorThreaded(); - UseQuarantine = false; - testAllocatorThreaded(); +#endif } struct DeathConfig { // Tiny allocator, its Primary only serves chunks of 1024 bytes. using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>; - typedef scudo::SizeClassAllocator32 Primary; + typedef scudo::SizeClassAllocator64 Primary; typedef scudo::MapAllocator<0U> Secondary; template using TSDRegistryT = scudo::TSDRegistrySharedT; }; @@ -258,8 +256,8 @@ TEST(ScudoCombinedTest, DeathCombined) { // Invalid sized deallocation. EXPECT_DEATH(Allocator->deallocate(P, Origin, Size + 8U), ""); - // Misaligned pointer. - void *MisalignedP = + // Misaligned pointer. Potentially unused if EXPECT_DEATH isn't available. + UNUSED void *MisalignedP = reinterpret_cast(reinterpret_cast(P) | 1U); EXPECT_DEATH(Allocator->deallocate(MisalignedP, Origin, Size), ""); EXPECT_DEATH(Allocator->reallocate(MisalignedP, Size * 2U), ""); diff --git a/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp b/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp index 1c07bf13181c2..45918ad4d2ca0 100644 --- a/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "flags.h" #include "flags_parser.h" -#include "gtest/gtest.h" - #include static const char FlagName[] = "flag_name"; diff --git a/compiler-rt/lib/scudo/standalone/tests/list_test.cpp b/compiler-rt/lib/scudo/standalone/tests/list_test.cpp index 0a0c050c98cd5..8e139916d0588 100644 --- a/compiler-rt/lib/scudo/standalone/tests/list_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/list_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/list.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "list.h" struct ListItem { ListItem *Next; diff --git a/compiler-rt/lib/scudo/standalone/tests/map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/map_test.cpp index ab5dd8ca5fd6a..7c40b73ff2544 100644 --- a/compiler-rt/lib/scudo/standalone/tests/map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/map_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "common.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "common.h" #include #include @@ -31,11 +31,10 @@ TEST(ScudoMapTest, MapNoAccessUnmap) { TEST(ScudoMapTest, MapUnmap) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, 0, &Data); + void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr); EXPECT_NE(P, nullptr); memset(P, 0xaa, Size); - scudo::unmap(P, Size, 0, &Data); + scudo::unmap(P, Size, 0, nullptr); EXPECT_DEATH(memset(P, 0xbb, Size), ""); } diff --git a/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp b/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp index c75ef8edb3666..ce715a19332f4 100644 --- a/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "mutex.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "mutex.h" +#include #include class TestData { diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 7da7b25ca67ed..64b625e79bf2d 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -6,15 +6,16 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "primary32.h" #include "primary64.h" #include "size_class_map.h" -#include "gtest/gtest.h" - #include #include #include +#include // Note that with small enough regions, the SizeClassAllocator64 also works on // 32-bit architectures. It's not something we want to encourage, but we still @@ -53,7 +54,9 @@ template static void testPrimary() { TEST(ScudoPrimaryTest, BasicPrimary) { using SizeClassMap = scudo::DefaultSizeClassMap; +#if !SCUDO_FUCHSIA testPrimary>(); +#endif testPrimary>(); } @@ -78,7 +81,7 @@ TEST(ScudoPrimaryTest, Primary64OOM) { AllocationFailed = true; break; } - for (scudo::uptr J = 0; J < B->getCount(); J++) + for (scudo::u32 J = 0; J < B->getCount(); J++) memset(B->get(J), 'B', Size); Batches.push_back(B); } @@ -136,7 +139,9 @@ template static void testIteratePrimary() { TEST(ScudoPrimaryTest, PrimaryIterate) { using SizeClassMap = scudo::DefaultSizeClassMap; +#if !SCUDO_FUCHSIA testIteratePrimary>(); +#endif testIteratePrimary>(); } @@ -193,7 +198,9 @@ template static void testPrimaryThreaded() { TEST(ScudoPrimaryTest, PrimaryThreaded) { using SizeClassMap = scudo::SvelteSizeClassMap; +#if !SCUDO_FUCHSIA testPrimaryThreaded>(); +#endif testPrimaryThreaded>(); } @@ -221,6 +228,8 @@ template static void testReleaseToOS() { TEST(ScudoPrimaryTest, ReleaseToOS) { using SizeClassMap = scudo::DefaultSizeClassMap; +#if !SCUDO_FUCHSIA testReleaseToOS>(); +#endif testReleaseToOS>(); } diff --git a/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp b/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp index 28baf8feb653f..0422c2ff3736b 100644 --- a/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "quarantine.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "quarantine.h" +#include #include static void *FakePtr = reinterpret_cast(0xFA83FA83); diff --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp index 3776768e9a848..22d73d09d53d7 100644 --- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp @@ -6,16 +6,17 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "list.h" #include "release.h" #include "size_class_map.h" -#include "gtest/gtest.h" - #include #include #include +#include TEST(ScudoReleaseTest, PackedCounterArray) { for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { diff --git a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp index c2f377d968491..09f03f1ac896d 100644 --- a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/report.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "report.h" TEST(ScudoReportTest, Generic) { - void *P = reinterpret_cast(0x42424242U); + // Potentially unused if EXPECT_DEATH isn't defined. + UNUSED void *P = reinterpret_cast(0x42424242U); EXPECT_DEATH(scudo::reportError("TEST123"), "Scudo ERROR.*TEST123"); EXPECT_DEATH(scudo::reportInvalidFlag("ABC", "DEF"), "Scudo ERROR.*ABC.*DEF"); EXPECT_DEATH(scudo::reportHeaderCorruption(P), "Scudo ERROR.*42424242"); diff --git a/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h new file mode 100644 index 0000000000000..55d039ef77c37 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h @@ -0,0 +1,29 @@ +//===-- scudo_unit_test.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_FUCHSIA +#include +#else +#include "gtest/gtest.h" +#endif + +// If EXPECT_DEATH isn't defined, make it a no-op. +#ifndef EXPECT_DEATH +#define EXPECT_DEATH(X, Y) \ + do { \ + } while (0) +#endif + +// If EXPECT_STREQ isn't defined, define our own simple one. +#ifndef EXPECT_STREQ +#define EXPECT_STREQ(X, Y) EXPECT_EQ(strcmp(X, Y), 0) +#endif + +extern bool UseQuarantine; diff --git a/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp index 60bd5648eef71..e771924354edf 100644 --- a/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp @@ -6,9 +6,25 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +// This allows us to turn on/off a Quarantine for specific tests. The Quarantine +// parameters are on the low end, to avoid having to loop excessively in some +// tests. +bool UseQuarantine = true; +extern "C" __attribute__((visibility("default"))) const char * +__scudo_default_options() { + if (!UseQuarantine) + return "dealloc_type_mismatch=true"; + return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" + "quarantine_max_chunk_size=512:dealloc_type_mismatch=true"; +} int main(int argc, char **argv) { +#if !SCUDO_FUCHSIA testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); +#else + return RUN_ALL_TESTS(argc, argv); +#endif } diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 047a61653cb2b..1e7dcec5861fe 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "secondary.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "secondary.h" #include @@ -16,6 +16,7 @@ #include #include #include +#include template static void testSecondaryBasic(void) { scudo::GlobalStats S; @@ -54,12 +55,18 @@ template static void testSecondaryBasic(void) { } TEST(ScudoSecondaryTest, SecondaryBasic) { - testSecondaryBasic>(); testSecondaryBasic>(); +#if !SCUDO_FUCHSIA + testSecondaryBasic>(); testSecondaryBasic>(); +#endif } +#if SCUDO_FUCHSIA +using LargeAllocator = scudo::MapAllocator<0U>; +#else using LargeAllocator = scudo::MapAllocator<>; +#endif // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the diff --git a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp index 39babc14902e4..55850400a7650 100644 --- a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/size_class_map.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "size_class_map.h" template void testSizeClassMap() { typedef SizeClassMap SCMap; diff --git a/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp b/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp index 449c1491d5558..cdadfbad3cbc2 100644 --- a/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/stats.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "stats.h" TEST(ScudoStatsTest, LocalStats) { scudo::LocalStats LStats; diff --git a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp index 3b1a5e8743e60..eed174dc586a4 100644 --- a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/string_utils.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "string_utils.h" #include diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp index 1941723d5d04f..b32c62fe6ca16 100644 --- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "tsd_exclusive.h" #include "tsd_shared.h" -#include "gtest/gtest.h" - #include #include #include @@ -108,7 +108,9 @@ template static void testRegistry() { TEST(ScudoTSDTest, TSDRegistryBasic) { testRegistry>(); testRegistry>(); +#if !SCUDO_FUCHSIA testRegistry>(); +#endif } static std::mutex Mutex; @@ -164,5 +166,7 @@ template static void testRegistryThreaded() { TEST(ScudoTSDTest, TSDRegistryThreaded) { testRegistryThreaded>(); testRegistryThreaded>(); +#if !SCUDO_FUCHSIA testRegistryThreaded>(); +#endif } diff --git a/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp b/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp index 946a44eee8e50..d2c6a9b6bb3cc 100644 --- a/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "vector.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "vector.h" TEST(ScudoVectorTest, Basic) { scudo::Vector V; diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp index cb651f265f027..99e7aa2fa21cd 100644 --- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "platform.h" - -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" +#include #include #include #include @@ -32,11 +31,6 @@ int malloc_iterate(uintptr_t base, size_t size, // We have to use a small quarantine to make sure that our double-free tests // trigger. Otherwise EXPECT_DEATH ends up reallocating the chunk that was just // freed (this depends on the size obviously) and the following free succeeds. -extern "C" __attribute__((visibility("default"))) const char * -__scudo_default_options() { - return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" - "quarantine_max_chunk_size=512"; -} static const size_t Size = 100U; @@ -200,6 +194,7 @@ TEST(ScudoWrappersCTest, Realloc) { #define M_PURGE -101 #endif +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallOpt) { errno = 0; EXPECT_EQ(mallopt(-1000, 1), 0); @@ -213,8 +208,10 @@ TEST(ScudoWrappersCTest, MallOpt) { EXPECT_EQ(mallopt(M_DECAY_TIME, 1), 1); EXPECT_EQ(mallopt(M_DECAY_TIME, 0), 1); } +#endif TEST(ScudoWrappersCTest, OtherAlloc) { +#if !SCUDO_FUCHSIA const size_t PageSize = sysconf(_SC_PAGESIZE); void *P = pvalloc(Size); @@ -229,10 +226,12 @@ TEST(ScudoWrappersCTest, OtherAlloc) { EXPECT_NE(P, nullptr); EXPECT_EQ(reinterpret_cast(P) & (PageSize - 1), 0U); free(P); +#endif EXPECT_EQ(valloc(SIZE_MAX), nullptr); } +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallInfo) { const size_t BypassQuarantineSize = 1024U; @@ -248,6 +247,7 @@ TEST(ScudoWrappersCTest, MallInfo) { MI = mallinfo(); EXPECT_GE(static_cast(MI.fordblks), Free + BypassQuarantineSize); } +#endif static uintptr_t BoundaryP; static size_t Count; @@ -282,6 +282,7 @@ TEST(ScudoWrappersCTest, MallocIterateBoundary) { free(P); } +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallocInfo) { char Buffer[64]; FILE *F = fmemopen(Buffer, sizeof(Buffer), "w+"); @@ -292,3 +293,4 @@ TEST(ScudoWrappersCTest, MallocInfo) { fclose(F); EXPECT_EQ(strncmp(Buffer, " #include #include +#include void operator delete(void *, size_t) noexcept; void operator delete[](void *, size_t) noexcept; @@ -18,12 +19,6 @@ void operator delete[](void *, size_t) noexcept; // Note that every Cxx allocation function in the test binary will be fulfilled // by Scudo. See the comment in the C counterpart of this file. -extern "C" __attribute__((visibility("default"))) const char * -__scudo_default_options() { - return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" - "quarantine_max_chunk_size=512:dealloc_type_mismatch=true"; -} - template static void testCxxNew() { T *P = new T; EXPECT_NE(P, nullptr); diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h index f24ff01960fb2..626cc4b80fb7b 100644 --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -38,7 +38,7 @@ template struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { void commitBack(Allocator *Instance) { Instance->commitBack(this); } - INLINE bool tryLock() { + inline bool tryLock() { if (Mutex.tryLock()) { atomic_store_relaxed(&Precedence, 0); return true; @@ -49,12 +49,12 @@ template struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { static_cast(getMonotonicTime() >> FIRST_32_SECOND_64(16, 0))); return false; } - INLINE void lock() { + inline void lock() { atomic_store_relaxed(&Precedence, 0); Mutex.lock(); } - INLINE void unlock() { Mutex.unlock(); } - INLINE uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } + inline void unlock() { Mutex.unlock(); } + inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } private: HybridMutex Mutex; diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h index a43cf3fc33769..5f58068edf781 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -50,6 +50,7 @@ template struct TSDRegistrySharedT { void unmapTestOnly() { unmap(reinterpret_cast(TSDs), sizeof(TSD) * NumberOfTSDs); + setCurrentTSD(nullptr); } ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h index d4370d506e5ea..7fc1a9600e53b 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h @@ -20,7 +20,7 @@ namespace scudo { // A common errno setting logic shared by almost all Scudo C wrappers. -INLINE void *setErrnoOnNull(void *Ptr) { +inline void *setErrnoOnNull(void *Ptr) { if (UNLIKELY(!Ptr)) errno = ENOMEM; return Ptr; @@ -30,14 +30,14 @@ INLINE void *setErrnoOnNull(void *Ptr) { // Checks aligned_alloc() parameters, verifies that the alignment is a power of // two and that the size is a multiple of alignment. -INLINE bool checkAlignedAllocAlignmentAndSize(uptr Alignment, uptr Size) { +inline bool checkAlignedAllocAlignmentAndSize(uptr Alignment, uptr Size) { return Alignment == 0 || !isPowerOfTwo(Alignment) || !isAligned(Size, Alignment); } // Checks posix_memalign() parameters, verifies that alignment is a power of two // and a multiple of sizeof(void *). -INLINE bool checkPosixMemalignAlignment(uptr Alignment) { +inline bool checkPosixMemalignAlignment(uptr Alignment) { return Alignment == 0 || !isPowerOfTwo(Alignment) || !isAligned(Alignment, sizeof(void *)); } @@ -45,7 +45,7 @@ INLINE bool checkPosixMemalignAlignment(uptr Alignment) { // Returns true if calloc(Size, N) overflows on Size*N calculation. Use a // builtin supported by recent clang & GCC if it exists, otherwise fallback to a // costly division. -INLINE bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { +inline bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { #if __has_builtin(__builtin_umull_overflow) return __builtin_umull_overflow(Size, N, Product); #else @@ -58,7 +58,7 @@ INLINE bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { // Returns true if the size passed to pvalloc overflows when rounded to the next // multiple of PageSize. -INLINE bool checkForPvallocOverflow(uptr Size, uptr PageSize) { +inline bool checkForPvallocOverflow(uptr Size, uptr PageSize) { return roundUpTo(Size, PageSize) < Size; } diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index 02ce6aabd6995..f0330bcfe3041 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -1,6 +1,7 @@ # Needed for lit support in standalone builds. include(AddLLVM) +pythonize_bool(LLVM_ENABLE_EXPENSIVE_CHECKS) configure_compiler_rt_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.common.configured.in ${CMAKE_CURRENT_BINARY_DIR}/lit.common.configured) diff --git a/compiler-rt/test/builtins/Unit/addtf3_test.c b/compiler-rt/test/builtins/Unit/addtf3_test.c index 7ca0355e42adf..dcd4efe9c9015 100644 --- a/compiler-rt/test/builtins/Unit/addtf3_test.c +++ b/compiler-rt/test/builtins/Unit/addtf3_test.c @@ -76,7 +76,8 @@ int main() UINT64_C(0x61e58dd6c51eb77c))) return 1; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) +#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ + defined(i386) || defined(__x86_64__) // Rounding mode tests on supported architectures const long double m = 1234.0L, n = 0.01L; diff --git a/compiler-rt/test/builtins/Unit/subtf3_test.c b/compiler-rt/test/builtins/Unit/subtf3_test.c index b95f2ef996d61..265ab642ecf0c 100644 --- a/compiler-rt/test/builtins/Unit/subtf3_test.c +++ b/compiler-rt/test/builtins/Unit/subtf3_test.c @@ -69,7 +69,8 @@ int main() UINT64_C(0xa44a7bca780a166c))) return 1; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) +#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ + defined(i386) || defined(__x86_64__) // Rounding mode tests on supported architectures const long double m = 1234.02L, n = 0.01L; diff --git a/compiler-rt/test/fuzzer/large.test b/compiler-rt/test/fuzzer/large.test index b03b60fdb6503..9aa7c46dc42c8 100644 --- a/compiler-rt/test/fuzzer/large.test +++ b/compiler-rt/test/fuzzer/large.test @@ -1,3 +1,5 @@ +UNSUPPORTED: expensive_checks + RUN: %cpp_compiler %S/LargeTest.cpp -o %t-LargeTest RUN: %run %t-LargeTest -runs=10000 diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 2a8d93166f695..00f0a1e93abe1 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -477,6 +477,9 @@ def is_windows_lto_supported(): else: config.available_features.add("shadow-scale-3") +if config.expensive_checks: + config.available_features.add("expensive_checks") + # Propagate the LLD/LTO into the clang config option, so nothing else is needed. run_wrapper = [] target_cflags = [getattr(config, 'target_cflags', None)] diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in index 5ca95efd530ce..b4862f74cdd02 100644 --- a/compiler-rt/test/lit.common.configured.in +++ b/compiler-rt/test/lit.common.configured.in @@ -42,6 +42,7 @@ set_default("android_serial", "@ANDROID_SERIAL_FOR_TESTING@") set_default("android_files_to_push", []) set_default("have_rpc_xdr_h", @HAVE_RPC_XDR_H@) set_default("gwp_asan", @COMPILER_RT_HAS_GWP_ASAN_PYBOOL@) +set_default("expensive_checks", @LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL@) config.available_features.add('target-is-%s' % config.target_arch) if config.enable_per_target_runtime_dir: diff --git a/compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c b/compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c new file mode 100644 index 0000000000000..774712d39738c --- /dev/null +++ b/compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c @@ -0,0 +1,89 @@ +/* This is a test case where the parent process forks 10 children + * which contend to merge profile data to the same file. With + * file locking support, the data from each child should not + * be lost. + */ +#include +#include +#include + +void spawn_child(PROCESS_INFORMATION *pi, int child_num) { + wchar_t child_str[10]; + _itow(child_num, child_str, 10); + if (!SetEnvironmentVariableW(L"CHILD_NUM", child_str)) { + printf("SetEnvironmentVariableW failed (0x%8lx).\n", GetLastError()); + fflush(stdout); + exit(1); + } + + STARTUPINFOW si; + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + + memset(pi, 0, sizeof(PROCESS_INFORMATION)); + + if (!CreateProcessW(NULL, // No module name (use command line) + GetCommandLineW(), // Command line + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + TRUE, // Set handle inheritance to TRUE + 0, // No flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, pi)) { + printf("CreateProcess failed (0x%08lx).\n", GetLastError()); + fflush(stdout); + exit(1); + } +} + +int wait_child(PROCESS_INFORMATION *pi) { + WaitForSingleObject(pi->hProcess, INFINITE); + + DWORD exit_code; + if (!GetExitCodeProcess(pi->hProcess, &exit_code)) { + printf("GetExitCodeProcess failed (0x%08lx).\n", GetLastError()); + fflush(stdout); + exit(1); + } + + CloseHandle(pi->hProcess); + CloseHandle(pi->hThread); + + return exit_code; +} + +#define NUM_CHILDREN 10 + +int foo(int num) { + if (num < (NUM_CHILDREN / 2)) { + return 1; + } else if (num < NUM_CHILDREN) { + return 2; + } + return 3; +} + +int main(int argc, char *argv[]) { + char *child_str = getenv("CHILD_NUM"); + if (!child_str) { + PROCESS_INFORMATION child[NUM_CHILDREN]; + // In parent + for (int i = 0; i < NUM_CHILDREN; i++) { + spawn_child(&child[i], i); + } + for (int i = 0; i < NUM_CHILDREN; i++) { + wait_child(&child[i]); + } + return 0; + } else { + // In child + int child_num = atoi(child_str); + int result = foo(child_num); + if (result == 3) { + fprintf(stderr, "Invalid child count!"); + return 1; + } + return 0; + } +} diff --git a/compiler-rt/test/profile/Windows/instrprof-multiprocess.test b/compiler-rt/test/profile/Windows/instrprof-multiprocess.test new file mode 100644 index 0000000000000..ae5ebd45bec95 --- /dev/null +++ b/compiler-rt/test/profile/Windows/instrprof-multiprocess.test @@ -0,0 +1,10 @@ +RUN: %clang_profgen %S/Inputs/instrprof-multiprocess.c -o %t +RUN: rm -f %t_*.profraw +RUN: env LLVM_PROFILE_FILE=%t_%m.profraw %run %t +RUN: llvm-profdata show --counts -function=foo %t_*.profraw | FileCheck %s + +CHECK: Counters: +CHECK: foo: +CHECK: Function count: 10 +CHECK: Block counts: [5, 5] +CHECK: Functions shown: 1 diff --git a/compiler-rt/test/profile/Windows/lit.local.cfg.py b/compiler-rt/test/profile/Windows/lit.local.cfg.py new file mode 100644 index 0000000000000..e924d91c44934 --- /dev/null +++ b/compiler-rt/test/profile/Windows/lit.local.cfg.py @@ -0,0 +1,9 @@ +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +if root.host_os not in ['Windows']: + config.unsupported = True diff --git a/compiler-rt/test/tsan/race_range_pc.cc b/compiler-rt/test/tsan/race_range_pc.cpp similarity index 100% rename from compiler-rt/test/tsan/race_range_pc.cc rename to compiler-rt/test/tsan/race_range_pc.cpp diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c new file mode 100644 index 0000000000000..0e62c02d3affb --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c @@ -0,0 +1,122 @@ +// RUN: %clang -x c -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + x = 255; + ++x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c new file mode 100644 index 0000000000000..4b56a105aa289 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c @@ -0,0 +1,120 @@ +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + x = 255; + ++x; + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c new file mode 100644 index 0000000000000..4806efb24eb13 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c @@ -0,0 +1,122 @@ +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + x = 255; + ++x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index bfbba319d7208..fa488da0885fb 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -565,7 +565,12 @@ function(cxx_add_basic_build_flags target) endif() if (LIBCXX_HAS_COMMENT_LIB_PRAGMA) - target_compile_definitions(${target} PRIVATE -D_LIBCPP_HAS_COMMENT_LIB_PRAGMA) + if (LIBCXX_HAS_PTHREAD_LIB) + target_compile_definitions(${target} PRIVATE -D_LIBCPP_LINK_PTHREAD_LIB) + endif() + if (LIBCXX_HAS_RT_LIB) + target_compile_definitions(${target} PRIVATE -D_LIBCPP_LINK_RT_LIB) + endif() endif() endfunction() diff --git a/libcxx/include/string b/libcxx/include/string index c16dbedc51c0f..4e0b21135a7e6 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2289,10 +2289,20 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr _NOEXCEPT_(is_nothrow_move_assignable::value) #endif { - __clear_and_shrink(); - __r_.first() = __str.__r_.first(); - __move_assign_alloc(__str); - __str.__zero(); + if (__is_long()) { + __alloc_traits::deallocate(__alloc(), __get_long_pointer(), + __get_long_cap()); +#if _LIBCPP_STD_VER <= 14 + if (!is_nothrow_move_assignable::value) { + __set_short_size(0); + traits_type::assign(__get_short_pointer()[0], value_type()); + } +#endif + } + __move_assign_alloc(__str); + __r_.first() = __str.__r_.first(); + __str.__set_short_size(0); + traits_type::assign(__str.__get_short_pointer()[0], value_type()); } template diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp index ffdcb5fccde7a..40669fb9e7561 100644 --- a/libcxx/src/algorithm.cpp +++ b/libcxx/src/algorithm.cpp @@ -10,7 +10,7 @@ #include "random" #ifndef _LIBCPP_HAS_NO_THREADS #include "mutex" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp index 2d78caea61c35..9d448b6a985bc 100644 --- a/libcxx/src/chrono.cpp +++ b/libcxx/src/chrono.cpp @@ -37,7 +37,7 @@ #endif #endif -#if defined(__unix__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB) #pragma comment(lib, "rt") #endif diff --git a/libcxx/src/condition_variable.cpp b/libcxx/src/condition_variable.cpp index bf89d255dd823..d133b010d71f4 100644 --- a/libcxx/src/condition_variable.cpp +++ b/libcxx/src/condition_variable.cpp @@ -15,7 +15,7 @@ #include "system_error" #include "__undef_macros" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif diff --git a/libcxx/src/debug.cpp b/libcxx/src/debug.cpp index 1f5ce1052f87d..20055fcf7590e 100644 --- a/libcxx/src/debug.cpp +++ b/libcxx/src/debug.cpp @@ -15,7 +15,7 @@ #include "__hash_table" #ifndef _LIBCPP_HAS_NO_THREADS #include "mutex" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxx/src/experimental/memory_resource.cpp b/libcxx/src/experimental/memory_resource.cpp index e987262831736..68c5bc99cc72a 100644 --- a/libcxx/src/experimental/memory_resource.cpp +++ b/libcxx/src/experimental/memory_resource.cpp @@ -12,7 +12,7 @@ #include "atomic" #elif !defined(_LIBCPP_HAS_NO_THREADS) #include "mutex" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp index 08a6b2b86e26a..876399fb4d4e5 100644 --- a/libcxx/src/filesystem/operations.cpp +++ b/libcxx/src/filesystem/operations.cpp @@ -44,7 +44,7 @@ #include // for gettimeofday and timeval #endif // !defined(CLOCK_REALTIME) -#if defined(__unix__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB) #pragma comment(lib, "rt") #endif diff --git a/libcxx/src/memory.cpp b/libcxx/src/memory.cpp index e89d94f27e4cd..633c9a6f56580 100644 --- a/libcxx/src/memory.cpp +++ b/libcxx/src/memory.cpp @@ -10,7 +10,7 @@ #ifndef _LIBCPP_HAS_NO_THREADS #include "mutex" #include "thread" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxx/src/mutex.cpp b/libcxx/src/mutex.cpp index 7e979cd890413..27a4fd8927785 100644 --- a/libcxx/src/mutex.cpp +++ b/libcxx/src/mutex.cpp @@ -13,7 +13,7 @@ #include "__undef_macros" #ifndef _LIBCPP_HAS_NO_THREADS -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxx/src/shared_mutex.cpp b/libcxx/src/shared_mutex.cpp index 9e6d5202aafaf..5feef9f4889f4 100644 --- a/libcxx/src/shared_mutex.cpp +++ b/libcxx/src/shared_mutex.cpp @@ -10,7 +10,7 @@ #ifndef _LIBCPP_HAS_NO_THREADS #include "shared_mutex" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif diff --git a/libcxx/src/thread.cpp b/libcxx/src/thread.cpp index 967a53735accb..c0bc1cbbbbc32 100644 --- a/libcxx/src/thread.cpp +++ b/libcxx/src/thread.cpp @@ -35,7 +35,7 @@ #include #endif -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 07657ea6e4f8f..befe75c20e76b 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -915,6 +915,7 @@ def configure_warnings(self): self.cxx.addWarningFlagIfSupported('-Wshadow') self.cxx.addWarningFlagIfSupported('-Wno-unused-command-line-argument') self.cxx.addWarningFlagIfSupported('-Wno-attributes') + self.cxx.addWarningFlagIfSupported('-Wno-deprecated-copy') self.cxx.addWarningFlagIfSupported('-Wno-constant-evaluated') self.cxx.addWarningFlagIfSupported('-Wno-pessimizing-move') self.cxx.addWarningFlagIfSupported('-Wno-c++11-extensions') diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index d914b6e02905e..0ddcd5f971f7f 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -456,7 +456,9 @@ if (LIBCXXABI_BAREMETAL) endif() if (LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) - add_definitions(-D_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) + if (LIBCXXABI_HAS_PTHREAD_LIB) + add_definitions(-D_LIBCXXABI_LINK_PTHREAD_LIB) + endif() endif() string(REPLACE ";" " " LIBCXXABI_CXX_FLAGS "${LIBCXXABI_CXX_FLAGS}") diff --git a/libcxxabi/src/cxa_exception_storage.cpp b/libcxxabi/src/cxa_exception_storage.cpp index 28c0122ff0746..24ff55e39d291 100644 --- a/libcxxabi/src/cxa_exception_storage.cpp +++ b/libcxxabi/src/cxa_exception_storage.cpp @@ -46,7 +46,7 @@ extern "C" { #include "abort_message.h" #include "fallback_malloc.h" -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif diff --git a/libcxxabi/src/cxa_guard_impl.h b/libcxxabi/src/cxa_guard_impl.h index 98e42ba2fb0b0..a8ec0b72feea2 100644 --- a/libcxxabi/src/cxa_guard_impl.h +++ b/libcxxabi/src/cxa_guard_impl.h @@ -50,7 +50,7 @@ #include #include <__threading_support> #ifndef _LIBCXXABI_HAS_NO_THREADS -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxxabi/src/cxa_thread_atexit.cpp b/libcxxabi/src/cxa_thread_atexit.cpp index 923b265b27c2c..a940eaf2f9cc3 100644 --- a/libcxxabi/src/cxa_thread_atexit.cpp +++ b/libcxxabi/src/cxa_thread_atexit.cpp @@ -10,7 +10,7 @@ #include "cxxabi.h" #include <__threading_support> #ifndef _LIBCXXABI_HAS_NO_THREADS -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libcxxabi/src/fallback_malloc.cpp b/libcxxabi/src/fallback_malloc.cpp index 8f301bcacd14c..fdae40764abef 100644 --- a/libcxxabi/src/fallback_malloc.cpp +++ b/libcxxabi/src/fallback_malloc.cpp @@ -13,7 +13,7 @@ #include <__threading_support> #ifndef _LIBCXXABI_HAS_NO_THREADS -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 25dc95cf6ba76..08095d1333a56 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -352,7 +352,12 @@ if (WIN32 AND LIBUNWIND_ENABLE_STATIC AND NOT LIBUNWIND_ENABLE_SHARED) endif() if (LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) - add_definitions(-D_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) + if (LIBUNWIND_HAS_DL_LIB) + add_definitions(-D_LIBUNWIND_LINK_DL_LIB) + endif() + if (LIBUNWIND_HAS_PTHREAD_LIB) + add_definitions(-D_LIBUNWIND_LINK_PTHREAD_LIB) + endif() endif() #=============================================================================== diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index db67df4dc80ac..7433476f91172 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -27,7 +27,7 @@ #if _LIBUNWIND_USE_DLADDR #include -#if defined(__unix__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBUNWIND_LINK_DL_LIB) #pragma comment(lib, "dl") #endif #endif diff --git a/libunwind/src/RWMutex.hpp b/libunwind/src/RWMutex.hpp index 954e94c322d45..fcd3f4967d17f 100644 --- a/libunwind/src/RWMutex.hpp +++ b/libunwind/src/RWMutex.hpp @@ -17,7 +17,7 @@ #include #elif !defined(_LIBUNWIND_HAS_NO_THREADS) #include -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBUNWIND_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 5cf07029fa1d5..4e80e3d78f167 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -40,7 +40,8 @@ class AArch64 : public TargetInfo { void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; bool usesOnlyLowPageBits(RelType type) const override; @@ -230,13 +231,14 @@ void AArch64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, } bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { // ELF for the ARM 64-bit architecture, section Call and Jump relocations // only permits range extension thunks for R_AARCH64_CALL26 and // R_AARCH64_JUMP26 relocation types. if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) return false; - uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); + uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a); return !inBranchRange(type, branchAddr, dst); } diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 41baea496d369..0f522d324ff74 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -39,7 +39,8 @@ class ARM final : public TargetInfo { void addPltSymbols(InputSection &isec, uint64_t off) const override; void addPltHeaderSymbols(InputSection &isd) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; @@ -262,7 +263,7 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index 74c0b59ecd5b9..317b22ec264c8 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -35,7 +35,8 @@ template class MIPS final : public TargetInfo { void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; bool usesOnlyLowPageBits(RelType type) const override; }; @@ -356,7 +357,8 @@ void MIPS::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, template bool MIPS::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t /*a*/) const { // Any MIPS PIC code function is invoked with its address in register $t9. // So if we have a branch instruction from non-PIC code to the PIC one // we cannot make the jump directly and need to create a small stubs diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index c4eecb9a29c22..b0d93c6ce9b5c 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -37,7 +37,8 @@ class PPC final : public TargetInfo { } void writeGotPlt(uint8_t *buf, const Symbol &s) const override; bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; @@ -169,7 +170,7 @@ void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 6299fd8a52436..ed16974af8679 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -205,7 +205,8 @@ class PPC64 final : public TargetInfo { void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; void writeGotHeader(uint8_t *buf) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, @@ -898,7 +899,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { if (type != R_PPC64_REL14 && type != R_PPC64_REL24) return false; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index b13bb5e00def3..a0987259d24ba 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1408,13 +1408,13 @@ static void handleUndefinedGlob(StringRef arg) { } std::vector syms; - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { // Calling Sym->fetch() from here is not safe because it may // add new symbols to the symbol table, invalidating the // current iterator. So we just keep a note. if (pat->match(sym->getName())) syms.push_back(sym); - }); + } for (Symbol *sym : syms) handleUndefined(sym); @@ -1440,10 +1440,10 @@ static void handleLibcall(StringRef name) { // result, the passes after the symbol resolution won't see any // symbols of type CommonSymbol. static void replaceCommonSymbols() { - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast(sym); if (!s) - return; + continue; auto *bss = make("COMMON", s->size, s->alignment); bss->file = s->file; @@ -1451,7 +1451,7 @@ static void replaceCommonSymbols() { inputSections.push_back(bss); s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, /*value=*/0, s->size, bss}); - }); + } } // If all references to a DSO happen to be weak, the DSO is not added @@ -1459,15 +1459,15 @@ static void replaceCommonSymbols() { // created from the DSO. Otherwise, they become dangling references // that point to a non-existent DSO. static void demoteSharedSymbols() { - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast(sym); if (!s || s->getFile().isNeeded) - return; + continue; bool used = s->used; s->replace(Undefined{nullptr, s->getName(), STB_WEAK, s->stOther, s->type}); s->used = used; - }); + } } // The section referred to by `s` is considered address-significant. Set the @@ -1503,10 +1503,9 @@ static void findKeepUniqueSections(opt::InputArgList &args) { // Symbols in the dynsym could be address-significant in other executables // or DSOs, so we conservatively mark them as address-significant. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->includeInDynsym()) markAddrsig(sym); - }); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 6da409568c8b1..524d552b0b84d 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -145,12 +145,12 @@ BitcodeCompiler::BitcodeCompiler() { config->ltoPartitions); // Initialize usedStartStop. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) if (s.startswith(prefix)) usedStartStop.insert(s.substr(prefix.size())); - }); + } } BitcodeCompiler::~BitcodeCompiler() = default; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index cebbd89168be5..a1561d2d41591 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -442,7 +442,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) { } void LinkerScript::discard(InputSectionBase *s) { - if (s == in.shStrTab || s == mainPart->relaDyn || s == mainPart->relrDyn) + if (s == in.shStrTab || s == mainPart->relrDyn) error("discarding " + s->name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 62fb8fe83a2ef..bb0105c289282 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -219,10 +219,9 @@ template void MarkLive::run() { // Preserve externally-visible symbols if the symbols defined by this // file can interrupt other ELF file's symbols at runtime. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->includeInDynsym() && sym->partition == partition) markSymbol(sym); - }); // If this isn't the main partition, that's all that we need to preserve. if (partition != 1) { @@ -330,11 +329,10 @@ template void markLive() { sec->markLive(); // If a DSO defines a symbol referenced in a regular object, it is needed. - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (auto *s = dyn_cast(sym)) if (s->isUsedInRegularObj && !s->isWeak()) s->getFile().isNeeded = true; - }); return; } diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index cc051dba0e0aa..8d328626b85f9 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -272,7 +272,12 @@ template void OutputSection::maybeCompress() { // Write section contents to a temporary buffer and compress it. std::vector buf(size); writeTo(buf.data()); - if (Error e = zlib::compress(toStringRef(buf), compressedData)) + // We chose 1 as the default compression level because it is the fastest. If + // -O2 is given, we use level 6 to compress debug info more by ~15%. We found + // that level 7 to 9 doesn't make much difference (~1% more compression) while + // they take significant amount of time (~2x), so level 6 seems enough. + if (Error e = zlib::compress(toStringRef(buf), compressedData, + config->optimize >= 2 ? 6 : 1)) fatal("compress failed: " + llvm::toString(std::move(e))); // Update section headers. diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index a4fc1ffbd1e72..ea30662d38249 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -777,6 +777,14 @@ static const Symbol *getAlternativeSpelling(const Undefined &sym, return s; } + // Case mismatch, e.g. Foo vs FOO. + for (auto &it : map) + if (name.equals_lower(it.first)) + return it.second; + for (Symbol *sym : symtab->symbols()) + if (!sym->isUndefined() && name.equals_lower(sym->getName())) + return sym; + // The reference may be a mangled name while the definition is not. Suggest a // missing extern "C". if (name.startswith("_Z")) { @@ -799,10 +807,11 @@ static const Symbol *getAlternativeSpelling(const Undefined &sym, break; } if (!s) - symtab->forEachSymbol([&](Symbol *sym) { - if (!s && canSuggestExternCForCXX(name, sym->getName())) + for (Symbol *sym : symtab->symbols()) + if (canSuggestExternCForCXX(name, sym->getName())) { s = sym; - }); + break; + } if (s) { pre_hint = " to declare "; post_hint = " as extern \"C\"?"; @@ -1754,23 +1763,43 @@ static bool isThunkSectionCompatible(InputSection *source, return true; } +static int64_t getPCBias(RelType type) { + if (config->emachine != EM_ARM) + return 0; + switch (type) { + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + return 4; + default: + return 8; + } +} + std::pair ThunkCreator::getThunk(InputSection *isec, Relocation &rel, uint64_t src) { std::vector *thunkVec = nullptr; + int64_t addend = rel.addend + getPCBias(rel.type); - // We use (section, offset) pair to find the thunk position if possible so - // that we create only one thunk for aliased symbols or ICFed sections. + // We use a ((section, offset), addend) pair to find the thunk position if + // possible so that we create only one thunk for aliased symbols or ICFed + // sections. There may be multiple relocations sharing the same (section, + // offset + addend) pair. We may revert the relocation back to its original + // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) - thunkVec = &thunkedSymbolsBySection[{d->section->repl, d->value}]; + thunkVec = &thunkedSymbolsBySectionAndAddend[{ + {d->section->repl, d->value}, addend}]; if (!thunkVec) - thunkVec = &thunkedSymbols[rel.sym]; + thunkVec = &thunkedSymbols[{rel.sym, addend}]; // Check existing Thunks for Sym to see if they can be reused for (Thunk *t : *thunkVec) if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) && t->isCompatibleWith(*isec, rel) && - target->inBranchRange(rel.type, src, t->getThunkTargetSym()->getVA())) + target->inBranchRange(rel.type, src, + t->getThunkTargetSym()->getVA(rel.addend) + + getPCBias(rel.type))) return std::make_pair(t, false); // No existing compatible Thunk in range, create a new one @@ -1785,9 +1814,13 @@ std::pair ThunkCreator::getThunk(InputSection *isec, // relocation back to its original non-Thunk target. bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { if (Thunk *t = thunks.lookup(rel.sym)) { - if (target->inBranchRange(rel.type, src, rel.sym->getVA())) + if (target->inBranchRange(rel.type, src, + rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; + // TODO Restore addend on all targets. + if (config->emachine == EM_AARCH64) + rel.addend = t->addend; if (rel.sym->isInPlt()) rel.expr = toPlt(rel.expr); } @@ -1843,7 +1876,7 @@ bool ThunkCreator::createThunks(ArrayRef outputSections) { continue; if (!target->needsThunk(rel.expr, rel.type, isec->file, src, - *rel.sym)) + *rel.sym, rel.addend)) continue; Thunk *t; @@ -1865,9 +1898,13 @@ bool ThunkCreator::createThunks(ArrayRef outputSections) { rel.sym = t->getThunkTargetSym(); rel.expr = fromPlt(rel.expr); + // On AArch64, a jump/call relocation may be encoded as STT_SECTION + // + non-zero addend, clear the addend after redirection. + // // The addend of R_PPC_PLTREL24 should be ignored after changing to // R_PC. - if (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24) + if (config->emachine == EM_AARCH64 || + (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24)) rel.addend = 0; } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index befe15b8f3b9b..060c55e3086d8 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -150,10 +150,17 @@ class ThunkCreator { bool normalizeExistingThunk(Relocation &rel, uint64_t src); - // Record all the available Thunks for a Symbol - llvm::DenseMap, std::vector> - thunkedSymbolsBySection; - llvm::DenseMap> thunkedSymbols; + // Record all the available Thunks for a (Symbol, addend) pair, where Symbol + // is represented as a (section, offset) pair. There may be multiple + // relocations sharing the same (section, offset + addend) pair. We may revert + // a relocation back to its original non-Thunk target, and restore the + // original addend, so we cannot fold offset + addend. A nested pair is used + // because DenseMapInfo is not specialized for std::tuple. + llvm::DenseMap, int64_t>, + std::vector> + thunkedSymbolsBySectionAndAddend; + llvm::DenseMap, std::vector> + thunkedSymbols; // Find a Thunk from the Thunks symbol definition, we can use this to find // the Thunk from a relocation to the Thunks symbol definition. diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index d3be0cb6450f9..507af8d2be75d 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -32,15 +32,19 @@ namespace elf { // add*() functions, which are called by input files as they are parsed. There // is one add* function per symbol type. class SymbolTable { -public: - void wrap(Symbol *sym, Symbol *real, Symbol *wrap); + struct FilterOutPlaceholder { + bool operator()(Symbol *S) const { return !S->isPlaceholder(); } + }; + using iterator = llvm::filter_iterator::const_iterator, + FilterOutPlaceholder>; - void forEachSymbol(llvm::function_ref fn) { - for (Symbol *sym : symVector) - if (!sym->isPlaceholder()) - fn(sym); +public: + llvm::iterator_range symbols() const { + return llvm::make_filter_range(symVector, FilterOutPlaceholder()); } + void wrap(Symbol *sym, Symbol *real, Symbol *wrap); + Symbol *insert(StringRef name); Symbol *addSymbol(const Symbol &newSym); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 40cc92faf7bb9..5bf7949dab53f 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -298,8 +298,8 @@ static size_t getHashSize() { // sets is empty, or some input files didn't have .note.gnu.property sections), // we don't create this section. GnuPropertySection::GnuPropertySection() - : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, 4, - ".note.gnu.property") {} + : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, + config->wordsize, ".note.gnu.property") {} void GnuPropertySection::writeTo(uint8_t *buf) { uint32_t featureAndType = config->emachine == EM_AARCH64 diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 024e0cfec27b5..e1e99556ec7b0 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -130,7 +130,8 @@ int64_t TargetInfo::getImplicitAddend(const uint8_t *buf, RelType type) const { bool TargetInfo::usesOnlyLowPageBits(RelType type) const { return false; } bool TargetInfo::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { return false; } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 39b999176717f..9d147ed7b1f30 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -58,7 +58,7 @@ class TargetInfo { // targeting S. virtual bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, uint64_t branchAddr, - const Symbol &s) const; + const Symbol &s, int64_t a) const; // On systems with range extensions we place collections of Thunks at // regular spacings that enable the majority of branches reach the Thunks. diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index d135177860439..8d2cdba616a68 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -49,7 +49,7 @@ namespace { // AArch64 long range Thunks class AArch64ABSLongThunk final : public Thunk { public: - AArch64ABSLongThunk(Symbol &dest) : Thunk(dest) {} + AArch64ABSLongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -57,7 +57,7 @@ class AArch64ABSLongThunk final : public Thunk { class AArch64ADRPThunk final : public Thunk { public: - AArch64ADRPThunk(Symbol &dest) : Thunk(dest) {} + AArch64ADRPThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} uint32_t size() override { return 12; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -73,7 +73,7 @@ class AArch64ADRPThunk final : public Thunk { // if the target is in range, otherwise it creates a long thunk. class ARMThunk : public Thunk { public: - ARMThunk(Symbol &dest) : Thunk(dest) {} + ARMThunk(Symbol &dest) : Thunk(dest, 0) {} bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -103,7 +103,7 @@ class ARMThunk : public Thunk { // which has a range of 16MB. class ThumbThunk : public Thunk { public: - ThumbThunk(Symbol &dest) : Thunk(dest) { alignment = 2; } + ThumbThunk(Symbol &dest) : Thunk(dest, 0) { alignment = 2; } bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -209,7 +209,7 @@ class ThumbV6MPILongThunk final : public ThumbThunk { // MIPS LA25 thunk class MipsThunk final : public Thunk { public: - MipsThunk(Symbol &dest) : Thunk(dest) {} + MipsThunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; @@ -220,7 +220,7 @@ class MipsThunk final : public Thunk { // microMIPS R2-R5 LA25 thunk class MicroMipsThunk final : public Thunk { public: - MicroMipsThunk(Symbol &dest) : Thunk(dest) {} + MicroMipsThunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 14; } void writeTo(uint8_t *buf) override; @@ -231,7 +231,7 @@ class MicroMipsThunk final : public Thunk { // microMIPS R6 LA25 thunk class MicroMipsR6Thunk final : public Thunk { public: - MicroMipsR6Thunk(Symbol &dest) : Thunk(dest) {} + MicroMipsR6Thunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 12; } void writeTo(uint8_t *buf) override; @@ -241,8 +241,11 @@ class MicroMipsR6Thunk final : public Thunk { class PPC32PltCallStub final : public Thunk { public: - PPC32PltCallStub(const InputSection &isec, const Relocation &rel, Symbol &dest) - : Thunk(dest), addend(rel.type == R_PPC_PLTREL24 ? rel.addend : 0), + // For R_PPC_PLTREL24, Thunk::addend records the addend which will be used to + // decide the offsets in the call stub. + PPC32PltCallStub(const InputSection &isec, const Relocation &rel, + Symbol &dest) + : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0), file(isec.file) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; @@ -250,10 +253,6 @@ class PPC32PltCallStub final : public Thunk { bool isCompatibleWith(const InputSection &isec, const Relocation &rel) const override; private: - // For R_PPC_PLTREL24, this records the addend, which will be used to decide - // the offsets in the call stub. - uint32_t addend; - // Records the call site of the call stub. const InputFile *file; }; @@ -268,7 +267,7 @@ class PPC32PltCallStub final : public Thunk { // 3) Transferring control to the target function through an indirect branch. class PPC64PltCallStub final : public Thunk { public: - PPC64PltCallStub(Symbol &dest) : Thunk(dest) {} + PPC64PltCallStub(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 20; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -289,7 +288,7 @@ class PPC64LongBranchThunk : public Thunk { void addSymbols(ThunkSection &isec) override; protected: - PPC64LongBranchThunk(Symbol &dest) : Thunk(dest) {} + PPC64LongBranchThunk(Symbol &dest) : Thunk(dest, 0) {} }; class PPC64PILongBranchThunk final : public PPC64LongBranchThunk { @@ -332,8 +331,8 @@ void Thunk::setOffset(uint64_t newOffset) { // AArch64 long range Thunks -static uint64_t getAArch64ThunkDestVA(const Symbol &s) { - uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(); +static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) { + uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(a); return v; } @@ -344,7 +343,7 @@ void AArch64ABSLongThunk::writeTo(uint8_t *buf) { 0x00, 0x00, 0x00, 0x00, // L0: .xword S 0x00, 0x00, 0x00, 0x00, }; - uint64_t s = getAArch64ThunkDestVA(destination); + uint64_t s = getAArch64ThunkDestVA(destination, addend); memcpy(buf, data, sizeof(data)); target->relocateOne(buf + 8, R_AARCH64_ABS64, s); } @@ -367,7 +366,7 @@ void AArch64ADRPThunk::writeTo(uint8_t *buf) { 0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest) 0x00, 0x02, 0x1f, 0xd6, // br x16 }; - uint64_t s = getAArch64ThunkDestVA(destination); + uint64_t s = getAArch64ThunkDestVA(destination, addend); uint64_t p = getThunkTargetSym()->getVA(); memcpy(buf, data, sizeof(data)); target->relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, @@ -795,16 +794,16 @@ void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) { isec); } -Thunk::Thunk(Symbol &d) : destination(d), offset(0) {} +Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {} Thunk::~Thunk() = default; -static Thunk *addThunkAArch64(RelType type, Symbol &s) { +static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) { if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) fatal("unrecognized relocation type"); if (config->picThunk) - return make(s); - return make(s); + return make(s, a); + return make(s, a); } // Creates a thunk for Thumb-ARM interworking. @@ -895,7 +894,8 @@ static Thunk *addThunkMips(RelType type, Symbol &s) { return make(s); } -static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, Symbol &s) { +static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, + Symbol &s) { assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) && "unexpected relocation type for thunk"); return make(isec, rel, s); @@ -914,9 +914,10 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s) { Thunk *addThunk(const InputSection &isec, Relocation &rel) { Symbol &s = *rel.sym; + int64_t a = rel.addend; if (config->emachine == EM_AARCH64) - return addThunkAArch64(rel.type, s); + return addThunkAArch64(rel.type, s, a); if (config->emachine == EM_ARM) return addThunkArm(rel.type, s); diff --git a/lld/ELF/Thunks.h b/lld/ELF/Thunks.h index 2d27ee5f6c38e..891bf8e5e4348 100644 --- a/lld/ELF/Thunks.h +++ b/lld/ELF/Thunks.h @@ -27,7 +27,7 @@ class ThunkSection; // Thunks are assigned to synthetic ThunkSections class Thunk { public: - Thunk(Symbol &destination); + Thunk(Symbol &destination, int64_t addend); virtual ~Thunk(); virtual uint32_t size() = 0; @@ -55,11 +55,12 @@ class Thunk { Defined *getThunkTargetSym() const { return syms[0]; } - // The alignment requirement for this Thunk, defaults to the size of the - // typical code section alignment. Symbol &destination; + int64_t addend; llvm::SmallVector syms; uint64_t offset = 0; + // The alignment requirement for this Thunk, defaults to the size of the + // typical code section alignment. uint32_t alignment = 4; }; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 3de1230150d64..ab59d0365085a 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1238,10 +1238,9 @@ static DenseMap buildSectionOrder() { // We want both global and local symbols. We get the global ones from the // symbol table and iterate the object files for the local ones. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (!sym->isLazy()) addSym(*sym); - }); for (InputFile *file : objectFiles) for (Symbol *sym : file->getSymbols()) @@ -1734,8 +1733,8 @@ template void Writer::finalizeSections() { for (Partition &part : partitions) finalizeSynthetic(part.ehFrame); - symtab->forEachSymbol( - [](Symbol *s) { s->isPreemptible = computeIsPreemptible(*s); }); + for (Symbol *sym : symtab->symbols()) + sym->isPreemptible = computeIsPreemptible(*sym); // Change values of linker-script-defined symbols from placeholders (assigned // by declareSymbols) to actual definitions. @@ -1769,19 +1768,18 @@ template void Writer::finalizeSections() { return symtab->soNames.count(needed); }); - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->isUndefined() && !sym->isWeak()) if (auto *f = dyn_cast_or_null(sym->file)) if (f->allNeededIsKnown) error(toString(f) + ": undefined reference to " + toString(*sym)); - }); } // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { if (!includeInSymtab(*sym)) - return; + continue; if (in.symTab) in.symTab->addSymbol(sym); @@ -1791,7 +1789,7 @@ template void Writer::finalizeSections() { if (file->isNeeded && !sym->isUndefined()) addVerneed(sym); } - }); + } // We also need to scan the dynamic relocation tables of the other partitions // and add any referenced symbols to the partition's dynsym. @@ -2190,6 +2188,9 @@ std::vector Writer::createPhdrs(Partition &part) { if (config->zWxneeded) addHdr(PT_OPENBSD_WXNEEDED, PF_X); + if (OutputSection *cmd = findSection(".note.gnu.property", partNo)) + addHdr(PT_GNU_PROPERTY, PF_R)->add(cmd); + // Create one PT_NOTE per a group of contiguous SHT_NOTE sections with the // same alignment. PhdrEntry *note = nullptr; diff --git a/lld/test/ELF/aarch64-feature-bti.s b/lld/test/ELF/aarch64-feature-bti.s index f2889c6fcc92b..0fa1cf83727bd 100644 --- a/lld/test/ELF/aarch64-feature-bti.s +++ b/lld/test/ELF/aarch64-feature-bti.s @@ -55,28 +55,30 @@ # BTIDYN: 0x0000000070000001 (AARCH64_BTI_PLT) # BTIDYN-NOT: 0x0000000070000003 (AARCH64_PAC_PLT) -# BTISO: 0000000000010310 func2: -# BTISO-NEXT: 10310: bl #48 -# BTISO-NEXT: 10314: ret +# BTISO: 0000000000010348 func2: +# BTISO-NEXT: 10348: bl #56 +# BTISO-NEXT: ret +# BTISO: 0000000000010350 func3: +# BTISO-NEXT: 10350: ret # BTISO: Disassembly of section .plt: -# BTISO: 0000000000010320 .plt: -# BTISO-NEXT: 10320: bti c -# BTISO-NEXT: 10324: stp x16, x30, [sp, #-16]! -# BTISO-NEXT: 10328: adrp x16, #131072 -# BTISO-NEXT: 1032c: ldr x17, [x16, #1072] -# BTISO-NEXT: 10330: add x16, x16, #1072 -# BTISO-NEXT: 10334: br x17 -# BTISO-NEXT: 10338: nop -# BTISO-NEXT: 1033c: nop -# BTISO: 0000000000010340 func3@plt: -# BTISO-NEXT: 10340: adrp x16, #131072 -# BTISO-NEXT: 10344: ldr x17, [x16, #1080] -# BTISO-NEXT: 10348: add x16, x16, #1080 -# BTISO-NEXT: 1034c: br x17 +# BTISO: 0000000000010360 .plt: +# BTISO-NEXT: 10360: bti c +# BTISO-NEXT: stp x16, x30, [sp, #-16]! +# BTISO-NEXT: adrp x16, #131072 +# BTISO-NEXT: ldr x17, [x16, #1136] +# BTISO-NEXT: add x16, x16, #1136 +# BTISO-NEXT: br x17 +# BTISO-NEXT: nop +# BTISO-NEXT: nop +# BTISO: 0000000000010380 func3@plt: +# BTISO-NEXT: 10380: adrp x16, #131072 +# BTISO-NEXT: ldr x17, [x16, #1144] +# BTISO-NEXT: add x16, x16, #1144 +# BTISO-NEXT: br x17 # SOGOTPLT2: Hex dump of section '.got.plt' -# SOGOTPLT2-NEXT: 0x00030420 00000000 00000000 00000000 00000000 -# SOGOTPLT2-NEXT: 0x00030430 00000000 00000000 20030100 00000000 +# SOGOTPLT2-NEXT: 0x00030460 00000000 00000000 00000000 00000000 +# SOGOTPLT2-NEXT: 0x00030470 00000000 00000000 60030100 00000000 ## Build an executable with all relocatable inputs having the BTI ## .note.gnu.property. We expect a bti c in front of all PLT entries as the @@ -89,26 +91,26 @@ # RUN: llvm-objdump -d -mattr=+bti --no-show-raw-insn %t.exe | FileCheck --check-prefix=EXECBTI %s # EXECBTI: Disassembly of section .text: -# EXECBTI: 0000000000210310 func1: -# EXECBTI-NEXT: 210310: bl #48 -# EXECBTI-NEXT: 210314: ret +# EXECBTI: 0000000000210348 func1: +# EXECBTI-NEXT: 210348: bl #40 +# EXECBTI-NEXT: ret # EXECBTI: Disassembly of section .plt: -# EXECBTI: 0000000000210320 .plt: -# EXECBTI-NEXT: 210320: bti c -# EXECBTI-NEXT: 210324: stp x16, x30, [sp, #-16]! -# EXECBTI-NEXT: 210328: adrp x16, #131072 -# EXECBTI-NEXT: 21032c: ldr x17, [x16, #1112] -# EXECBTI-NEXT: 210330: add x16, x16, #1112 -# EXECBTI-NEXT: 210334: br x17 -# EXECBTI-NEXT: 210338: nop -# EXECBTI-NEXT: 21033c: nop -# EXECBTI: 0000000000210340 func2@plt: -# EXECBTI-NEXT: 210340: bti c -# EXECBTI-NEXT: 210344: adrp x16, #131072 -# EXECBTI-NEXT: 210348: ldr x17, [x16, #1120] -# EXECBTI-NEXT: 21034c: add x16, x16, #1120 -# EXECBTI-NEXT: 210350: br x17 -# EXECBTI-NEXT: 210354: nop +# EXECBTI: 0000000000210350 .plt: +# EXECBTI-NEXT: 210350: bti c +# EXECBTI-NEXT: stp x16, x30, [sp, #-16]! +# EXECBTI-NEXT: adrp x16, #131072 +# EXECBTI-NEXT: ldr x17, [x16, #1160] +# EXECBTI-NEXT: add x16, x16, #1160 +# EXECBTI-NEXT: br x17 +# EXECBTI-NEXT: nop +# EXECBTI-NEXT: nop +# EXECBTI: 0000000000210370 func2@plt: +# EXECBTI-NEXT: 210370: bti c +# EXECBTI-NEXT: adrp x16, #131072 +# EXECBTI-NEXT: ldr x17, [x16, #1168] +# EXECBTI-NEXT: add x16, x16, #1168 +# EXECBTI-NEXT: br x17 +# EXECBTI-NEXT: nop ## We expect the same for PIE, as the address of an ifunc can escape # RUN: ld.lld --pie %t.o %t.so %t2.so -o %tpie.exe @@ -117,26 +119,26 @@ # RUN: llvm-objdump -d -mattr=+bti --no-show-raw-insn %tpie.exe | FileCheck --check-prefix=PIE %s # PIE: Disassembly of section .text: -# PIE: 0000000000010310 func1: -# PIE-NEXT: 10310: bl #48 -# PIE-NEXT: 10314: ret +# PIE: 0000000000010348 func1: +# PIE-NEXT: 10348: bl #40 +# PIE-NEXT: ret # PIE: Disassembly of section .plt: -# PIE: 0000000000010320 .plt: -# PIE-NEXT: 10320: bti c -# PIE-NEXT: 10324: stp x16, x30, [sp, #-16]! -# PIE-NEXT: 10328: adrp x16, #131072 -# PIE-NEXT: 1032c: ldr x17, [x16, #1112] -# PIE-NEXT: 10330: add x16, x16, #1112 -# PIE-NEXT: 10334: br x17 -# PIE-NEXT: 10338: nop -# PIE-NEXT: 1033c: nop -# PIE: 0000000000010340 func2@plt: -# PIE-NEXT: 10340: bti c -# PIE-NEXT: 10344: adrp x16, #131072 -# PIE-NEXT: 10348: ldr x17, [x16, #1120] -# PIE-NEXT: 1034c: add x16, x16, #1120 -# PIE-NEXT: 10350: br x17 -# PIE-NEXT: 10354: nop +# PIE: 0000000000010350 .plt: +# PIE-NEXT: 10350: bti c +# PIE-NEXT: stp x16, x30, [sp, #-16]! +# PIE-NEXT: adrp x16, #131072 +# PIE-NEXT: ldr x17, [x16, #1160] +# PIE-NEXT: add x16, x16, #1160 +# PIE-NEXT: br x17 +# PIE-NEXT: nop +# PIE-NEXT: nop +# PIE: 0000000000010370 func2@plt: +# PIE-NEXT: 10370: bti c +# PIE-NEXT: adrp x16, #131072 +# PIE-NEXT: ldr x17, [x16, #1168] +# PIE-NEXT: add x16, x16, #1168 +# PIE-NEXT: br x17 +# PIE-NEXT: nop ## Build and executable with not all relocatable inputs having the BTI ## .note.property, expect no bti c and no .note.gnu.property entry @@ -148,24 +150,24 @@ # NOEX: Disassembly of section .text: # NOEX: 00000000002102e0 func1: # NOEX-NEXT: 2102e0: bl #48 -# NOEX-NEXT: 2102e4: ret +# NOEX-NEXT: ret # NOEX: 00000000002102e8 func3: # NOEX-NEXT: 2102e8: ret # NOEX: Disassembly of section .plt: # NOEX: 00000000002102f0 .plt: # NOEX-NEXT: 2102f0: stp x16, x30, [sp, #-16]! -# NOEX-NEXT: 2102f4: adrp x16, #131072 -# NOEX-NEXT: 2102f8: ldr x17, [x16, #1024] -# NOEX-NEXT: 2102fc: add x16, x16, #1024 -# NOEX-NEXT: 210300: br x17 -# NOEX-NEXT: 210304: nop -# NOEX-NEXT: 210308: nop -# NOEX-NEXT: 21030c: nop +# NOEX-NEXT: adrp x16, #131072 +# NOEX-NEXT: ldr x17, [x16, #1024] +# NOEX-NEXT: add x16, x16, #1024 +# NOEX-NEXT: br x17 +# NOEX-NEXT: nop +# NOEX-NEXT: nop +# NOEX-NEXT: nop # NOEX: 0000000000210310 func2@plt: # NOEX-NEXT: 210310: adrp x16, #131072 -# NOEX-NEXT: 210314: ldr x17, [x16, #1032] -# NOEX-NEXT: 210318: add x16, x16, #1032 -# NOEX-NEXT: 21031c: br x17 +# NOEX-NEXT: ldr x17, [x16, #1032] +# NOEX-NEXT: add x16, x16, #1032 +# NOEX-NEXT: br x17 ## Force BTI entries with the --force-bti command line option. Expect a warning ## from the file without the .note.gnu.property. @@ -180,28 +182,28 @@ # RUN: llvm-objdump -d -mattr=+bti --no-show-raw-insn %tforcebti.exe | FileCheck --check-prefix=FORCE %s # FORCE: Disassembly of section .text: -# FORCE: 0000000000210338 func1: -# FORCE-NEXT: 210338: bl #56 -# FORCE-NEXT: 21033c: ret -# FORCE: 0000000000210340 func3: -# FORCE-NEXT: 210340: ret +# FORCE: 0000000000210370 func1: +# FORCE-NEXT: 210370: bl #48 +# FORCE-NEXT: ret +# FORCE: 0000000000210378 func3: +# FORCE-NEXT: 210378: ret # FORCE: Disassembly of section .plt: -# FORCE: 0000000000210350 .plt: -# FORCE-NEXT: 210350: bti c -# FORCE-NEXT: 210354: stp x16, x30, [sp, #-16]! -# FORCE-NEXT: 210358: adrp x16, #131072 -# FORCE-NEXT: 21035c: ldr x17, [x16, #1144] -# FORCE-NEXT: 210360: add x16, x16, #1144 -# FORCE-NEXT: 210364: br x17 -# FORCE-NEXT: 210368: nop -# FORCE-NEXT: 21036c: nop -# FORCE: 0000000000210370 func2@plt: -# FORCE-NEXT: 210370: bti c -# FORCE-NEXT: 210374: adrp x16, #131072 -# FORCE-NEXT: 210378: ldr x17, [x16, #1152] -# FORCE-NEXT: 21037c: add x16, x16, #1152 -# FORCE-NEXT: 210380: br x17 -# FORCE-NEXT: 210384: nop +# FORCE: 0000000000210380 .plt: +# FORCE-NEXT: 210380: bti c +# FORCE-NEXT: stp x16, x30, [sp, #-16]! +# FORCE-NEXT: adrp x16, #131072 +# FORCE-NEXT: ldr x17, [x16, #1192] +# FORCE-NEXT: add x16, x16, #1192 +# FORCE-NEXT: br x17 +# FORCE-NEXT: nop +# FORCE-NEXT: nop +# FORCE: 00000000002103a0 func2@plt: +# FORCE-NEXT: 2103a0: bti c +# FORCE-NEXT: adrp x16, #131072 +# FORCE-NEXT: ldr x17, [x16, #1200] +# FORCE-NEXT: add x16, x16, #1200 +# FORCE-NEXT: br x17 +# FORCE-NEXT: nop .section ".note.gnu.property", "a" .long 4 diff --git a/lld/test/ELF/aarch64-feature-btipac.s b/lld/test/ELF/aarch64-feature-btipac.s index c1fa4c1d3b289..30e00b2dbbd81 100644 --- a/lld/test/ELF/aarch64-feature-btipac.s +++ b/lld/test/ELF/aarch64-feature-btipac.s @@ -15,28 +15,28 @@ # RUN: llvm-readelf --dynamic-table %t.so | FileCheck --check-prefix BTIPACDYN %s # BTIPACSO: Disassembly of section .text: -# BTIPACSO: 0000000000010310 func2: -# BTIPACSO-NEXT: 10310: bl #48 -# BTIPACSO-NEXT: 10314: ret -# BTIPACSO: 0000000000010318 func3: -# BTIPACSO-NEXT: 10318: ret +# BTIPACSO: 0000000000010348 func2: +# BTIPACSO-NEXT: 10348: bl #56 +# BTIPACSO-NEXT: ret +# BTIPACSO: 0000000000010350 func3: +# BTIPACSO-NEXT: 10350: ret # BTIPACSO: Disassembly of section .plt: -# BTIPACSO: 0000000000010320 .plt: -# BTIPACSO-NEXT: 10320: bti c -# BTIPACSO-NEXT: 10324: stp x16, x30, [sp, #-16]! -# BTIPACSO-NEXT: 10328: adrp x16, #131072 -# BTIPACSO-NEXT: 1032c: ldr x17, [x16, #1096] -# BTIPACSO-NEXT: 10330: add x16, x16, #1096 -# BTIPACSO-NEXT: 10334: br x17 -# BTIPACSO-NEXT: 10338: nop -# BTIPACSO-NEXT: 1033c: nop -# BTIPACSO: 0000000000010340 func3@plt: -# BTIPACSO-NEXT: 10340: adrp x16, #131072 -# BTIPACSO-NEXT: 10344: ldr x17, [x16, #1104] -# BTIPACSO-NEXT: 10348: add x16, x16, #1104 -# BTIPACSO-NEXT: 1034c: autia1716 -# BTIPACSO-NEXT: 10350: br x17 -# BTIPACSO-NEXT: 10354: nop +# BTIPACSO: 0000000000010360 .plt: +# BTIPACSO-NEXT: 10360: bti c +# BTIPACSO-NEXT: stp x16, x30, [sp, #-16]! +# BTIPACSO-NEXT: adrp x16, #131072 +# BTIPACSO-NEXT: ldr x17, [x16, #1160] +# BTIPACSO-NEXT: add x16, x16, #1160 +# BTIPACSO-NEXT: br x17 +# BTIPACSO-NEXT: nop +# BTIPACSO-NEXT: nop +# BTIPACSO: 0000000000010380 func3@plt: +# BTIPACSO-NEXT: 10380: adrp x16, #131072 +# BTIPACSO-NEXT: ldr x17, [x16, #1168] +# BTIPACSO-NEXT: add x16, x16, #1168 +# BTIPACSO-NEXT: autia1716 +# BTIPACSO-NEXT: br x17 +# BTIPACSO-NEXT: nop # BTIPACPROP: Properties: aarch64 feature: BTI, PAC @@ -53,29 +53,29 @@ # RUN: llvm-readelf --dynamic-table %t.exe | FileCheck --check-prefix BTIPACDYN %s # BTIPACEX: Disassembly of section .text: -# BTIPACEX: 0000000000210338 func1: -# BTIPACEX-NEXT: 210338: bl #56 -# BTIPACEX-NEXT: 21033c: ret -# BTIPACEX-NEXT: 210340: ret -# BTIPACEX: 0000000000210344 func3: -# BTIPACEX-NEXT: 210344: ret +# BTIPACEX: 0000000000210370 func1: +# BTIPACEX-NEXT: 210370: bl #48 +# BTIPACEX-NEXT: ret +# BTIPACEX-NEXT: ret +# BTIPACEX: 000000000021037c func3: +# BTIPACEX-NEXT: 21037c: ret # BTIPACEX: Disassembly of section .plt: -# BTIPACEX: 0000000000210350 .plt: -# BTIPACEX-NEXT: 210350: bti c -# BTIPACEX-NEXT: 210354: stp x16, x30, [sp, #-16]! -# BTIPACEX-NEXT: 210358: adrp x16, #131072 -# BTIPACEX-NEXT: 21035c: ldr x17, [x16, #1160] -# BTIPACEX-NEXT: 210360: add x16, x16, #1160 -# BTIPACEX-NEXT: 210364: br x17 -# BTIPACEX-NEXT: 210368: nop -# BTIPACEX-NEXT: 21036c: nop -# BTIPACEX: 0000000000210370 func2@plt: -# BTIPACEX-NEXT: 210370: bti c -# BTIPACEX-NEXT: 210374: adrp x16, #131072 -# BTIPACEX-NEXT: 210378: ldr x17, [x16, #1168] -# BTIPACEX-NEXT: 21037c: add x16, x16, #1168 -# BTIPACEX-NEXT: 210380: autia1716 -# BTIPACEX-NEXT: 210384: br x17 +# BTIPACEX: 0000000000210380 .plt: +# BTIPACEX-NEXT: 210380: bti c +# BTIPACEX-NEXT: stp x16, x30, [sp, #-16]! +# BTIPACEX-NEXT: adrp x16, #131072 +# BTIPACEX-NEXT: ldr x17, [x16, #1208] +# BTIPACEX-NEXT: add x16, x16, #1208 +# BTIPACEX-NEXT: br x17 +# BTIPACEX-NEXT: nop +# BTIPACEX-NEXT: nop +# BTIPACEX: 00000000002103a0 func2@plt: +# BTIPACEX-NEXT: 2103a0: bti c +# BTIPACEX-NEXT: adrp x16, #131072 +# BTIPACEX-NEXT: ldr x17, [x16, #1216] +# BTIPACEX-NEXT: add x16, x16, #1216 +# BTIPACEX-NEXT: autia1716 +# BTIPACEX-NEXT: br x17 ## Check that combinations of BTI+PAC with 0 properties results in standard PLT @@ -86,25 +86,25 @@ # EX: Disassembly of section .text: # EX: 00000000002102e0 func1: # EX-NEXT: 2102e0: bl #48 -# EX-NEXT: 2102e4: ret -# EX-NEXT: 2102e8: ret +# EX-NEXT: ret +# EX-NEXT: ret # EX: 00000000002102ec func3: # EX-NEXT: 2102ec: ret # EX: Disassembly of section .plt: # EX: 00000000002102f0 .plt: # EX-NEXT: 2102f0: stp x16, x30, [sp, #-16]! -# EX-NEXT: 2102f4: adrp x16, #131072 -# EX-NEXT: 2102f8: ldr x17, [x16, #1024] -# EX-NEXT: 2102fc: add x16, x16, #1024 -# EX-NEXT: 210300: br x17 -# EX-NEXT: 210304: nop -# EX-NEXT: 210308: nop -# EX-NEXT: 21030c: nop +# EX-NEXT: adrp x16, #131072 +# EX-NEXT: ldr x17, [x16, #1024] +# EX-NEXT: add x16, x16, #1024 +# EX-NEXT: br x17 +# EX-NEXT: nop +# EX-NEXT: nop +# EX-NEXT: nop # EX: 0000000000210310 func2@plt: # EX: 210310: adrp x16, #131072 -# EX-NEXT: 210314: ldr x17, [x16, #1032] -# EX-NEXT: 210318: add x16, x16, #1032 -# EX-NEXT: 21031c: br x17 +# EX-NEXT: ldr x17, [x16, #1032] +# EX-NEXT: add x16, x16, #1032 +# EX-NEXT: br x17 # NODYN-NOT: 0x0000000070000001 (AARCH64_BTI_PLT) # NODYN-NOT: 0x0000000070000003 (AARCH64_PAC_PLT) diff --git a/lld/test/ELF/aarch64-feature-pac.s b/lld/test/ELF/aarch64-feature-pac.s index cb0bcee70a8a8..7a4f8ee64ffdb 100644 --- a/lld/test/ELF/aarch64-feature-pac.s +++ b/lld/test/ELF/aarch64-feature-pac.s @@ -15,22 +15,22 @@ # NOPAC: 00000000000102b8 func2: # NOPAC-NEXT: 102b8: bl #56 -# NOPAC-NEXT: 102bc: ret +# NOPAC-NEXT: ret # NOPAC: Disassembly of section .plt: # NOPAC: 00000000000102d0 .plt: # NOPAC-NEXT: 102d0: stp x16, x30, [sp, #-16]! -# NOPAC-NEXT: 102d4: adrp x16, #131072 -# NOPAC-NEXT: 102d8: ldr x17, [x16, #960] -# NOPAC-NEXT: 102dc: add x16, x16, #960 -# NOPAC-NEXT: 102e0: br x17 -# NOPAC-NEXT: 102e4: nop -# NOPAC-NEXT: 102e8: nop -# NOPAC-NEXT: 102ec: nop +# NOPAC-NEXT: adrp x16, #131072 +# NOPAC-NEXT: ldr x17, [x16, #960] +# NOPAC-NEXT: add x16, x16, #960 +# NOPAC-NEXT: br x17 +# NOPAC-NEXT: nop +# NOPAC-NEXT: nop +# NOPAC-NEXT: nop # NOPAC: 00000000000102f0 func3@plt: # NOPAC-NEXT: 102f0: adrp x16, #131072 -# NOPAC-NEXT: 102f4: ldr x17, [x16, #968] -# NOPAC-NEXT: 102f8: add x16, x16, #968 -# NOPAC-NEXT: 102fc: br x17 +# NOPAC-NEXT: ldr x17, [x16, #968] +# NOPAC-NEXT: add x16, x16, #968 +# NOPAC-NEXT: br x17 # NOPACDYN-NOT: 0x0000000070000001 (AARCH64_BTI_PLT) # NOPACDYN-NOT: 0x0000000070000003 (AARCH64_PAC_PLT) @@ -44,34 +44,36 @@ ## PAC has no effect on PLT[0], for PLT[N] autia1716 is used to authenticate ## the address in x17 (context in x16) before branching to it. The dynamic ## loader is responsible for calling pacia1716 on the entry. -# PACSO: 0000000000010310 func2: -# PACSO-NEXT: 10310: bl #48 -# PACSO-NEXT: 10314: ret +# PACSO: 0000000000010348 func2: +# PACSO-NEXT: 10348: bl #56 +# PACSO-NEXT: ret +# PACSO: 0000000000010350 func3: +# PACSO-NEXT: 10350: ret # PACSO: Disassembly of section .plt: -# PACSO: 0000000000010320 .plt: -# PACSO-NEXT: 10320: stp x16, x30, [sp, #-16]! -# PACSO-NEXT: 10324: adrp x16, #131072 -# PACSO-NEXT: 10328: ldr x17, [x16, #1080] -# PACSO-NEXT: 1032c: add x16, x16, #1080 -# PACSO-NEXT: 10330: br x17 -# PACSO-NEXT: 10334: nop -# PACSO-NEXT: 10338: nop -# PACSO-NEXT: 1033c: nop -# PACSO: 0000000000010340 func3@plt: -# PACSO-NEXT: 10340: adrp x16, #131072 -# PACSO-NEXT: 10344: ldr x17, [x16, #1088] -# PACSO-NEXT: 10348: add x16, x16, #1088 -# PACSO-NEXT: 1034c: autia1716 -# PACSO-NEXT: 10350: br x17 -# PACSO-NEXT: 10354: nop +# PACSO: 0000000000010360 .plt: +# PACSO-NEXT: 10360: stp x16, x30, [sp, #-16]! +# PACSO-NEXT: adrp x16, #131072 +# PACSO-NEXT: ldr x17, [x16, #1144] +# PACSO-NEXT: add x16, x16, #1144 +# PACSO-NEXT: br x17 +# PACSO-NEXT: nop +# PACSO-NEXT: nop +# PACSO-NEXT: nop +# PACSO: 0000000000010380 func3@plt: +# PACSO-NEXT: 10380: adrp x16, #131072 +# PACSO-NEXT: ldr x17, [x16, #1152] +# PACSO-NEXT: add x16, x16, #1152 +# PACSO-NEXT: autia1716 +# PACSO-NEXT: br x17 +# PACSO-NEXT: nop # SOGOTPLT: Hex dump of section '.got.plt': # SOGOTPLT-NEXT: 0x000303b0 00000000 00000000 00000000 00000000 # SOGOTPLT-NEXT: 0x000303c0 00000000 00000000 d0020100 00000000 # SOGOTPLT2: Hex dump of section '.got.plt': -# SOGOTPLT2-NEXT: 0x00030428 00000000 00000000 00000000 00000000 -# SOGOTPLT2-NEXT: 0x00030438 00000000 00000000 20030100 00000000 +# SOGOTPLT2-NEXT: 0x00030468 00000000 00000000 00000000 00000000 +# SOGOTPLT2-NEXT: 0x00030478 00000000 00000000 60030100 00000000 # PACPROP: Properties: aarch64 feature: PAC @@ -89,29 +91,28 @@ # RUN: llvm-objdump -d -mattr=+v8.3a --no-show-raw-insn %tpacplt.exe | FileCheck --check-prefix PACPLT %s # PACPLT: Disassembly of section .text: -# PACPLT: 0000000000210338 func1: -# PACPLT-NEXT: 210338: bl #56 -# PACPLT-NEXT: 21033c: ret -# PACPLT: 0000000000210340 func3: -# PACPLT-NEXT: 210340: ret +# PACPLT: 0000000000210370 func1: +# PACPLT-NEXT: 210370: bl #48 +# PACPLT-NEXT: ret +# PACPLT: 0000000000210378 func3: +# PACPLT-NEXT: 210378: ret # PACPLT: Disassembly of section .plt: -# PACPLT: 0000000000210350 .plt: -# PACPLT-NEXT: 210350: stp x16, x30, [sp, #-16]! -# PACPLT-NEXT: 210354: adrp x16, #131072 -# PACPLT-NEXT: 210358: ldr x17, [x16, #1144] -# PACPLT-NEXT: 21035c: add x16, x16, #1144 -# PACPLT-NEXT: 210360: br x17 -# PACPLT-NEXT: 210364: nop -# PACPLT-NEXT: 210368: nop -# PACPLT-NEXT: 21036c: nop -# PACPLT: 0000000000210370 func2@plt: -# PACPLT-NEXT: 210370: adrp x16, #131072 -# PACPLT-NEXT: 210374: ldr x17, [x16, #1152] -# PACPLT-NEXT: 210378: add x16, x16, #1152 -# PACPLT-NEXT: 21037c: autia1716 -# PACPLT-NEXT: 210380: br x17 -# PACPLT-NEXT: 210384: nop - +# PACPLT: 0000000000210380 .plt: +# PACPLT-NEXT: 210380: stp x16, x30, [sp, #-16]! +# PACPLT-NEXT: adrp x16, #131072 +# PACPLT-NEXT: ldr x17, [x16, #1192] +# PACPLT-NEXT: add x16, x16, #1192 +# PACPLT-NEXT: br x17 +# PACPLT-NEXT: nop +# PACPLT-NEXT: nop +# PACPLT-NEXT: nop +# PACPLT: 00000000002103a0 func2@plt: +# PACPLT-NEXT: 2103a0: adrp x16, #131072 +# PACPLT-NEXT: ldr x17, [x16, #1200] +# PACPLT-NEXT: add x16, x16, #1200 +# PACPLT-NEXT: autia1716 +# PACPLT-NEXT: br x17 +# PACPLT-NEXT: nop .section ".note.gnu.property", "a" .long 4 diff --git a/lld/test/ELF/aarch64-ifunc-bti.s b/lld/test/ELF/aarch64-ifunc-bti.s index 6a50b317ca3d2..70369d3e9f818 100644 --- a/lld/test/ELF/aarch64-ifunc-bti.s +++ b/lld/test/ELF/aarch64-ifunc-bti.s @@ -4,37 +4,37 @@ # RUN: ld.lld --shared --soname=t1.so %t1.o -o %t1.so # RUN: ld.lld --pie %t1.so %t.o -o %t -# RUN: llvm-objdump -d -mattr=+bti -triple=aarch64-linux-gnu %t | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn -mattr=+bti -triple=aarch64-linux-gnu %t | FileCheck %s # When the address of an ifunc is taken using a non-got reference which clang # can do, LLD exports a canonical PLT entry that may have its address taken so # we must use bti c. # CHECK: Disassembly of section .plt: -# CHECK: 0000000000010340 .plt: -# CHECK-NEXT: 10340: 5f 24 03 d5 bti c -# CHECK-NEXT: 10344: f0 7b bf a9 stp x16, x30, [sp, #-16]! -# CHECK-NEXT: 10348: 10 01 00 90 adrp x16, #131072 -# CHECK-NEXT: 1034c: 11 5e 42 f9 ldr x17, [x16, #1208] -# CHECK-NEXT: 10350: 10 e2 12 91 add x16, x16, #1208 -# CHECK-NEXT: 10354: 20 02 1f d6 br x17 -# CHECK-NEXT: 10358: 1f 20 03 d5 nop -# CHECK-NEXT: 1035c: 1f 20 03 d5 nop -# CHECK: 0000000000010360 func1@plt: -# CHECK-NEXT: 10360: 5f 24 03 d5 bti c -# CHECK-NEXT: 10364: 10 01 00 90 adrp x16, #131072 -# CHECK-NEXT: 10368: 11 62 42 f9 ldr x17, [x16, #1216] -# CHECK-NEXT: 1036c: 10 02 13 91 add x16, x16, #1216 -# CHECK-NEXT: 10370: 20 02 1f d6 br x17 -# CHECK-NEXT: 10374: 1f 20 03 d5 nop +# CHECK: 0000000000010380 .plt: +# CHECK-NEXT: 10380: bti c +# CHECK-NEXT: stp x16, x30, [sp, #-16]! +# CHECK-NEXT: adrp x16, #131072 +# CHECK-NEXT: ldr x17, [x16, #1272] +# CHECK-NEXT: add x16, x16, #1272 +# CHECK-NEXT: br x17 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK: 00000000000103a0 func1@plt: +# CHECK-NEXT: 103a0: bti c +# CHECK-NEXT: adrp x16, #131072 +# CHECK-NEXT: ldr x17, [x16, #1280] +# CHECK-NEXT: add x16, x16, #1280 +# CHECK-NEXT: br x17 +# CHECK-NEXT: nop # CHECK-NEXT: ... -# CHECK: 0000000000010380 myfunc: -# CHECK-NEXT: 10380: 5f 24 03 d5 bti c -# CHECK-NEXT: 10384: 10 01 00 90 adrp x16, #131072 -# CHECK-NEXT: 10388: 11 66 42 f9 ldr x17, [x16, #1224] -# CHECK-NEXT: 1038c: 10 22 13 91 add x16, x16, #1224 -# CHECK-NEXT: 10390: 20 02 1f d6 br x17 -# CHECK-NEXT: 10394: 1f 20 03 d5 nop +# CHECK: 00000000000103c0 myfunc: +# CHECK-NEXT: 103c0: bti c +# CHECK-NEXT: adrp x16, #131072 +# CHECK-NEXT: ldr x17, [x16, #1288] +# CHECK-NEXT: add x16, x16, #1288 +# CHECK-NEXT: br x17 +# CHECK-NEXT: nop .section ".note.gnu.property", "a" .long 4 diff --git a/lld/test/ELF/aarch64-thunk-pi.s b/lld/test/ELF/aarch64-thunk-pi.s index 965a93764a730..2545f8fb2ea18 100644 --- a/lld/test/ELF/aarch64-thunk-pi.s +++ b/lld/test/ELF/aarch64-thunk-pi.s @@ -16,28 +16,36 @@ low_target: bl high_target ret // CHECK: low_target: -// CHECK-NEXT: d8: bl #0x10 <__AArch64ADRPThunk_high_target> +// CHECK-NEXT: d8: bl #0x18 <__AArch64ADRPThunk_high_target> // CHECK-NEXT: ret .hidden low_target2 .globl low_target2 .type low_target2, %function low_target2: - // Need thunk to high_target + // Need thunk to high_target2 bl high_target2 + // .text_high+8 = high_target2 + bl .text_high+8 ret // CHECK: low_target2: -// CHECK-NEXT: e0: bl #0x14 <__AArch64ADRPThunk_high_target2> +// CHECK-NEXT: e0: bl #0x1c <__AArch64ADRPThunk_high_target2> +// CHECK-NEXT: e4: bl #0x24 <__AArch64ADRPThunk_> // CHECK-NEXT: ret // Expect range extension thunks for .text_low // adrp calculation is (PC + signed immediate) & (!0xfff) // CHECK: __AArch64ADRPThunk_high_target: -// CHECK-NEXT: e8: adrp x16, #0x10000000 +// CHECK-NEXT: f0: adrp x16, #0x10000000 // CHECK-NEXT: add x16, x16, #0x40 // CHECK-NEXT: br x16 // CHECK: __AArch64ADRPThunk_high_target2: -// CHECK-NEXT: f4: adrp x16, #0x10000000 +// CHECK-NEXT: fc: adrp x16, #0x10000000 +// CHECK-NEXT: add x16, x16, #0x8 +// CHECK-NEXT: br x16 +/// Identical to the previous one, but for the target .text_high+8. +// CHECK: __AArch64ADRPThunk_: +// CHECK-NEXT: 108: adrp x16, #0x10000000 // CHECK-NEXT: add x16, x16, #0x8 // CHECK-NEXT: br x16 diff --git a/lld/test/ELF/aarch64-thunk-script.s b/lld/test/ELF/aarch64-thunk-script.s index cf8187dd5bb35..176c137223b29 100644 --- a/lld/test/ELF/aarch64-thunk-script.s +++ b/lld/test/ELF/aarch64-thunk-script.s @@ -15,6 +15,8 @@ _start: // Need thunk to high_target@plt bl high_target + // Need thunk to .text_high+4 + bl .text_high+4 ret .section .text_high, "ax", %progbits @@ -28,14 +30,21 @@ high_target: // CHECK: Disassembly of section .text_low: // CHECK-EMPTY: // CHECK-NEXT: _start: -// CHECK-NEXT: 2000: bl #0x8 <__AArch64AbsLongThunk_high_target> +// CHECK-NEXT: 2000: bl #0x10 <__AArch64AbsLongThunk_high_target> +// CHECK-NEXT: 2004: bl #0x1c <__AArch64AbsLongThunk_> // CHECK-NEXT: ret // CHECK: __AArch64AbsLongThunk_high_target: -// CHECK-NEXT: 2008: ldr x16, #0x8 +// CHECK-NEXT: 2010: ldr x16, #0x8 // CHECK-NEXT: br x16 // CHECK: $d: -// CHECK-NEXT: 2010: 00 20 00 08 .word 0x08002000 -// CHECK-NEXT: 2014: 00 00 00 00 .word 0x00000000 +// CHECK-NEXT: 2018: 00 20 00 08 .word 0x08002000 +// CHECK-NEXT: 201c: 00 00 00 00 .word 0x00000000 +// CHECK: __AArch64AbsLongThunk_: +// CHECK-NEXT: 2020: ldr x16, #0x8 +// CHECK-NEXT: 2024: br x16 +// CHECK: $d: +// CHECK-NEXT: 2028: 04 20 00 08 .word 0x08002004 +// CHECK-NEXT: 202c: 00 00 00 00 .word 0x00000000 // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: high_target: diff --git a/lld/test/ELF/compressed-debug-level.test b/lld/test/ELF/compressed-debug-level.test new file mode 100644 index 0000000000000..d755e9fedf13b --- /dev/null +++ b/lld/test/ELF/compressed-debug-level.test @@ -0,0 +1,38 @@ +# REQUIRES: x86, zlib + +# RUN: yaml2obj %s -o %t.o + +# RUN: ld.lld %t.o -o %t.default --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.default | FileCheck -check-prefixes=HEADER,LEVEL1 %s + +# RUN: ld.lld -O0 %t.o -o %t.O0 --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.O0 | FileCheck -check-prefixes=HEADER,LEVEL1 %s +# RUN: cmp %t.default %t.O0 + +# RUN: ld.lld -O1 %t.o -o %t.O1 --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.O1 | FileCheck -check-prefixes=HEADER,LEVEL1 %s +# RUN: cmp %t.default %t.O1 + +# RUN: ld.lld -O2 %t.o -o %t.O2 --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.O2 | FileCheck -check-prefixes=HEADER,LEVEL6 %s + +## LLD uses zlib compression of level 1 when -O0, -O1 and level 6 when -O2. +## Here we check how -O flag affects the size of compressed sections produced. + +# HEADER: [Nr] Name Type Address Off Size +# LEVEL1: [ 1] .debug_info PROGBITS 00000000 000094 00001c +# LEVEL6: [ 1] .debug_info PROGBITS 00000000 000094 00001a + +## A little arbitrary debug section which has a different size after +## applying compression of level 1 and 6. + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_386 +Sections: + - Name: .debug_info + Type: SHT_PROGBITS + Content: '010101010101010201010201' diff --git a/lld/test/ELF/gnu-property-align-32.s b/lld/test/ELF/gnu-property-align-32.s new file mode 100644 index 0000000000000..8022a49d34c6c --- /dev/null +++ b/lld/test/ELF/gnu-property-align-32.s @@ -0,0 +1,40 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=i686-linux-gnu %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --sections -n %t | FileCheck %s + +## Check that .note.gnu.property has alignment 4 and is readable by llvm-readobj + +# CHECK: Name: .note.gnu.property +# CHECK-NEXT: Type: SHT_NOTE (0x7) +# CHECK-NEXT: Flags [ (0x2) +# CHECK-NEXT: SHF_ALLOC (0x2) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x4000F4 +# CHECK-NEXT: Offset: 0xF4 +# CHECK-NEXT: Size: 28 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 4 + +# CHECK: Note { +# CHECK-NEXT: Owner: GNU +# CHECK-NEXT: Data size: 0xC +# CHECK-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) +# CHECK-NEXT: Property [ +# CHECK-NEXT: x86 feature: IBT + +.section ".note.gnu.property", "a" +.p2align 2 +.long 4 +.long 0xc +.long 0x5 +.asciz "GNU" +.p2align 2 +.long 0xc0000002 # GNU_PROPERTY_X86_FEATURE_1_AND +.long 4 +.long 1 # GNU_PROPERTY_X86_FEATURE_1_IBT + +.text +.globl _start + ret diff --git a/lld/test/ELF/gnu-property-align.s b/lld/test/ELF/gnu-property-align.s new file mode 100644 index 0000000000000..b109c09039a2c --- /dev/null +++ b/lld/test/ELF/gnu-property-align.s @@ -0,0 +1,42 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-linux-gnu %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --sections -n %t | FileCheck %s + +## Check that .note.gnu.property has alignment 8 and is readable by llvm-readobj + +# CHECK: Name: .note.gnu.property +# CHECK-NEXT: Type: SHT_NOTE (0x7) +# CHECK-NEXT: Flags [ (0x2) +# CHECK-NEXT: SHF_ALLOC (0x2) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x200190 +# CHECK-NEXT: Offset: 0x190 +# CHECK-NEXT: Size: 32 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 8 + +# CHECK: Note { +# CHECK-NEXT: Owner: GNU +# CHECK-NEXT: Data size: 0x10 +# CHECK-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) +# CHECK-NEXT: Property [ +# CHECK-NEXT: x86 feature: IBT + + +.section ".note.gnu.property", "a" +.long 4 +.long 0x10 +.long 0x5 +.asciz "GNU" + +.long 0xc0000002 # GNU_PROPERTY_X86_FEATURE_1_AND +.long 4 +.long 1 # GNU_PROPERTY_X86_FEATURE_1_IBT +.long 0 + + .text + .globl _start + .type _start, %function +_start: ret diff --git a/lld/test/ELF/linkerscript/discard-section-err.s b/lld/test/ELF/linkerscript/discard-section-err.s index bb77dbb087da3..dd3c666e115b1 100644 --- a/lld/test/ELF/linkerscript/discard-section-err.s +++ b/lld/test/ELF/linkerscript/discard-section-err.s @@ -20,8 +20,19 @@ # RUN: ld.lld -pie -o %t --script %t.script %t.o # RUN: echo "SECTIONS { /DISCARD/ : { *(.rela.dyn) } }" > %t.script -# RUN: not ld.lld -pie -o %t --script %t.script %t.o 2>&1 | \ -# RUN: FileCheck -check-prefix=RELADYN %s -# RELADYN: discarding .rela.dyn section is not allowed +# RUN: ld.lld -pie -o %t %t.o +# RUN: llvm-readobj -S %t | FileCheck --check-prefix=RELADYN %s +# RELADYN: Name: .rela.dyn +# RUN: ld.lld -pie -o %t --script %t.script %t.o +# RUN: llvm-readobj -S %t | FileCheck /dev/null --implicit-check-not='Name: .rela.dyn' + +# RUN: echo "SECTIONS { /DISCARD/ : { *(.relr.dyn) } }" > %t.script +# RUN: not ld.lld -pie --pack-dyn-relocs=relr -o %t --script %t.script %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=RELRDYN %s +# RELRDYN: discarding .relr.dyn section is not allowed -.comm foo,4,4 +.data +.align 8 +foo: +## Emits an R_X86_64_RELATIVE in -pie mode. +.quad foo diff --git a/lld/test/ELF/pt-gnu-property.s b/lld/test/ELF/pt-gnu-property.s new file mode 100644 index 0000000000000..5758967b0e0b5 --- /dev/null +++ b/lld/test/ELF/pt-gnu-property.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-linux-gnu %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --sections --program-headers %t | FileCheck %s + +## Test that we generate the PT_GNU_PROPERTY segment type that describes the +## .note.gnu.property if it is present. + +# CHECK: Name: .note.gnu.property +# CHECK-NEXT: Type: SHT_NOTE (0x7) +# CHECK-NEXT: Flags [ (0x2) +# CHECK-NEXT: SHF_ALLOC (0x2) +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x200190 +# CHECK-NEXT: Offset: 0x190 +# CHECK-NEXT: Size: 32 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 8 + +# CHECK: Type: PT_GNU_PROPERTY (0x6474E553) +# CHECK-NEXT: Offset: 0x190 +# CHECK-NEXT: VirtualAddress: 0x200190 +# CHECK-NEXT: PhysicalAddress: 0x200190 +# CHECK-NEXT: FileSize: 32 +# CHECK-NEXT: MemSize: 32 +# CHECK-NEXT: Flags [ (0x4) +# CHECK-NEXT: PF_R (0x4) +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 8 + +.section ".note.gnu.property", "a" +.long 4 +.long 0x10 +.long 0x5 +.asciz "GNU" + +.long 0xc0000002 # GNU_PROPERTY_X86_FEATURE_1_AND +.long 4 +.long 1 # GNU_PROPERTY_X86_FEATURE_1_IBT +.long 0 + +.text +.globl _start + ret diff --git a/lld/test/ELF/undef-spell-corrector.s b/lld/test/ELF/undef-spell-corrector.s index 174c8009cba8d..3ad2421a6cd63 100644 --- a/lld/test/ELF/undef-spell-corrector.s +++ b/lld/test/ELF/undef-spell-corrector.s @@ -63,6 +63,16 @@ # CONST-NEXT: >>> referenced by {{.*}} # CONST-NEXT: >>> did you mean: foo(int const*) +## Case mismatch. +# RUN: echo 'call _Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o +# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s +# RUN: echo '_Z3fooPKi: call _Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o +# RUN: not ld.lld %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s + +# CASE: error: undefined symbol: FOO(int const*) +# CASE-NEXT: >>> referenced by {{.*}} +# CASE-NEXT: >>> did you mean: foo(int const*) + .globl _start, abcde, _Z3fooPKi _start: abcde: diff --git a/lld/test/ELF/verdef-defaultver.s b/lld/test/ELF/verdef-defaultver.s index 3c10f2dcfe26f..7d2a0d27fa11d 100644 --- a/lld/test/ELF/verdef-defaultver.s +++ b/lld/test/ELF/verdef-defaultver.s @@ -84,6 +84,7 @@ # DSO-NEXT: Index: 1 # DSO-NEXT: Hash: 127830196 # DSO-NEXT: Name: shared +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -92,6 +93,7 @@ # DSO-NEXT: Index: 2 # DSO-NEXT: Hash: 1425 # DSO-NEXT: Name: V1 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -100,6 +102,7 @@ # DSO-NEXT: Index: 3 # DSO-NEXT: Hash: 1426 # DSO-NEXT: Name: V2 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: ] diff --git a/lld/test/ELF/verdef-dependency.s b/lld/test/ELF/verdef-dependency.s index 479f332d49306..d716436202535 100644 --- a/lld/test/ELF/verdef-dependency.s +++ b/lld/test/ELF/verdef-dependency.s @@ -15,6 +15,7 @@ # DSO-NEXT: Index: 1 # DSO-NEXT: Hash: 127830196 # DSO-NEXT: Name: shared +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -23,6 +24,7 @@ # DSO-NEXT: Index: 2 # DSO-NEXT: Hash: 98457184 # DSO-NEXT: Name: LIBSAMPLE_1.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -31,6 +33,7 @@ # DSO-NEXT: Index: 3 # DSO-NEXT: Hash: 98456416 # DSO-NEXT: Name: LIBSAMPLE_2.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -39,5 +42,6 @@ # DSO-NEXT: Index: 4 # DSO-NEXT: Hash: 98456672 # DSO-NEXT: Name: LIBSAMPLE_3.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: ] diff --git a/lld/test/ELF/verdef.s b/lld/test/ELF/verdef.s index d2aa924782f86..dd1f1d41f0148 100644 --- a/lld/test/ELF/verdef.s +++ b/lld/test/ELF/verdef.s @@ -33,6 +33,7 @@ # DSO-NEXT: Index: 1 # DSO-NEXT: Hash: 127830196 # DSO-NEXT: Name: shared +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -41,6 +42,7 @@ # DSO-NEXT: Index: 2 # DSO-NEXT: Hash: 98457184 # DSO-NEXT: Name: LIBSAMPLE_1.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -49,6 +51,7 @@ # DSO-NEXT: Index: 3 # DSO-NEXT: Hash: 98456416 # DSO-NEXT: Name: LIBSAMPLE_2.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -57,6 +60,7 @@ # DSO-NEXT: Index: 4 # DSO-NEXT: Hash: 98456672 # DSO-NEXT: Name: LIBSAMPLE_3.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: ] # DSO-NEXT: VersionRequirements [ diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 81d7dd8123bd0..e66fa49a51142 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -119,26 +119,20 @@ if (NOT LLDB_DISABLE_PYTHON) "${lldb_scripts_dir}/lldb.py" "${lldb_python_build_path}/__init__.py") - if(APPLE) - SET(lldb_python_heap_dir "${lldb_python_build_path}/macosx/heap") - add_custom_command(TARGET finish_swig POST_BUILD VERBATIM - COMMAND ${CMAKE_COMMAND} -E make_directory ${lldb_python_heap_dir} - COMMAND ${CMAKE_COMMAND} -E copy - "${LLDB_SOURCE_DIR}/examples/darwin/heap_find/heap/heap_find.cpp" - "${LLDB_SOURCE_DIR}/examples/darwin/heap_find/heap/Makefile" - ${lldb_python_heap_dir}) - endif() - - function(create_python_package target pkg_dir) - cmake_parse_arguments(ARG "" "" "FILES" ${ARGN}) + function(create_python_package pkg_dir) + cmake_parse_arguments(ARG "NOINIT" "" "FILES" ${ARGN}) if(ARG_FILES) set(copy_cmd COMMAND ${CMAKE_COMMAND} -E copy ${ARG_FILES} ${pkg_dir}) endif() - add_custom_command(TARGET ${target} POST_BUILD VERBATIM + if(NOT ARG_NOINIT) + set(init_cmd COMMAND ${PYTHON_EXECUTABLE} + "${LLDB_SOURCE_DIR}/scripts/Python/createPythonInit.py" + "${pkg_dir}" ${ARG_FILES}) + endif() + add_custom_command(TARGET finish_swig POST_BUILD VERBATIM COMMAND ${CMAKE_COMMAND} -E make_directory ${pkg_dir} ${copy_cmd} - COMMAND ${PYTHON_EXECUTABLE} "${LLDB_SOURCE_DIR}/scripts/Python/createPythonInit.py" - ${pkg_dir} ${ARG_FILES} + ${init_cmd} WORKING_DIRECTORY ${lldb_python_build_path}) endfunction() @@ -146,28 +140,33 @@ if (NOT LLDB_DISABLE_PYTHON) COMMAND ${CMAKE_COMMAND} -E copy "${LLDB_SOURCE_DIR}/source/Interpreter/embedded_interpreter.py" ${lldb_python_build_path}) - create_python_package(finish_swig "formatters/cpp" + # Distribute the examples as python packages. + create_python_package("formatters/cpp" FILES "${LLDB_SOURCE_DIR}/examples/synthetic/gnu_libstdcpp.py" "${LLDB_SOURCE_DIR}/examples/synthetic/libcxx.py") - # Make an empty __init__.py in lldb/runtime as this is required for - # Python to recognize lldb.runtime as a valid package (and hence, - # lldb.runtime.objc as a valid contained package) - create_python_package(finish_swig "runtime") - # Having these files copied here ensure that lldb/formatters is a - # valid package itself - create_python_package(finish_swig "formatters" + + create_python_package("formatters" FILES "${LLDB_SOURCE_DIR}/examples/summaries/cocoa/cache.py" "${LLDB_SOURCE_DIR}/examples/summaries/synth.py" "${LLDB_SOURCE_DIR}/examples/summaries/cocoa/metrics.py" "${LLDB_SOURCE_DIR}/examples/summaries/cocoa/attrib_fromdict.py" "${LLDB_SOURCE_DIR}/examples/summaries/cocoa/Logger.py") - create_python_package(finish_swig "utils" - FILES "${LLDB_SOURCE_DIR}/examples/python/symbolication.py") + + create_python_package("utils" + FILES "${LLDB_SOURCE_DIR}/examples/python/in_call_stack.py" + "${LLDB_SOURCE_DIR}/examples/python/symbolication.py") + if(APPLE) - create_python_package(finish_swig "macosx" + create_python_package("macosx" FILES "${LLDB_SOURCE_DIR}/examples/python/crashlog.py" "${LLDB_SOURCE_DIR}/examples/darwin/heap_find/heap.py") - create_python_package(finish_swig "diagnose" + + create_python_package("macosx/heap" + FILES "${LLDB_SOURCE_DIR}/examples/darwin/heap_find/heap/heap_find.cpp" + "${LLDB_SOURCE_DIR}/examples/darwin/heap_find/heap/Makefile" + NOINIT) + + create_python_package("diagnose" FILES "${LLDB_SOURCE_DIR}/examples/python/diagnose_unwind.py" "${LLDB_SOURCE_DIR}/examples/python/diagnose_nsstring.py") endif() diff --git a/lldb/docs/lldb-gdb-remote.txt b/lldb/docs/lldb-gdb-remote.txt index e3f11488df640..06cd09d77c412 100644 --- a/lldb/docs/lldb-gdb-remote.txt +++ b/lldb/docs/lldb-gdb-remote.txt @@ -790,6 +790,13 @@ distribution_id: optional. For linux, specifies distribution id (e.g. ubuntu, fe osmajor: optional, specifies the major version number of the OS (e.g. for macOS 10.12.2, it would be 10) osminor: optional, specifies the minor version number of the OS (e.g. for macOS 10.12.2, it would be 12) ospatch: optional, specifies the patch level number of the OS (e.g. for macOS 10.12.2, it would be 2) +addressing_bits: optional, specifies how many bits in addresses are + significant for addressing, base 10. If bits 38..0 + in a 64-bit pointer are significant for addressing, + then the value is 39. This is needed on e.g. Aarch64 + v8.3 ABIs that use pointer authentication, so lldb + knows which bits to clear/set to get the actual + addresses. //---------------------------------------------------------------------- // "qGDBServerVersion" diff --git a/lldb/docs/use/map.rst b/lldb/docs/use/map.rst index d878b5633e83f..3c6c6e6ffc620 100644 --- a/lldb/docs/use/map.rst +++ b/lldb/docs/use/map.rst @@ -880,6 +880,20 @@ Examining Variables + + Print an array of integers in memory, assuming we have a pointer like "int *ptr". + + + + (gdb) p *ptr@10 +
+ + + (lldb) parray 10 ptr +
+ + + diff --git a/lldb/include/lldb/API/SBReproducer.h b/lldb/include/lldb/API/SBReproducer.h index 93e567607aa85..8bb530a0fe429 100644 --- a/lldb/include/lldb/API/SBReproducer.h +++ b/lldb/include/lldb/API/SBReproducer.h @@ -20,7 +20,7 @@ class LLDB_API SBReproducer { public: static const char *Capture(); static const char *Capture(const char *path); - static const char *Replay(const char *path); + static const char *Replay(const char *path, bool skip_version_check = false); static const char *GetPath(); static bool Generate(); }; diff --git a/lldb/include/lldb/Breakpoint/BreakpointList.h b/lldb/include/lldb/Breakpoint/BreakpointList.h index 110e8d41f36b5..ad68151fefc78 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointList.h +++ b/lldb/include/lldb/Breakpoint/BreakpointList.h @@ -67,8 +67,10 @@ class BreakpointList { /// The breakpoint name for which to search. /// /// \result - /// \bfalse if the input name was not a legal breakpoint name. - bool FindBreakpointsByName(const char *name, BreakpointList &matching_bps); + /// error if the input name was not a legal breakpoint name, vector + /// of breakpoints otherwise. + llvm::Expected> + FindBreakpointsByName(const char *name); /// Returns the number of elements in this breakpoint list. /// diff --git a/lldb/include/lldb/Breakpoint/BreakpointOptions.h b/lldb/include/lldb/Breakpoint/BreakpointOptions.h index 9e02afff5227e..2c52170eb9f6a 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointOptions.h +++ b/lldb/include/lldb/Breakpoint/BreakpointOptions.h @@ -88,7 +88,8 @@ friend class Breakpoint; explicit CommandBaton(std::unique_ptr Data) : TypedBaton(std::move(Data)) {} - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override; + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override; }; typedef std::shared_ptr CommandBatonSP; diff --git a/lldb/include/lldb/Breakpoint/WatchpointOptions.h b/lldb/include/lldb/Breakpoint/WatchpointOptions.h index b395dde21901e..0dc34d4ebef73 100644 --- a/lldb/include/lldb/Breakpoint/WatchpointOptions.h +++ b/lldb/include/lldb/Breakpoint/WatchpointOptions.h @@ -180,7 +180,8 @@ class WatchpointOptions { CommandBaton(std::unique_ptr Data) : TypedBaton(std::move(Data)) {} - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override; + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override; }; protected: diff --git a/lldb/include/lldb/Core/IOHandler.h b/lldb/include/lldb/Core/IOHandler.h index 04b94da3a8c1e..5c1246751abc0 100644 --- a/lldb/include/lldb/Core/IOHandler.h +++ b/lldb/include/lldb/Core/IOHandler.h @@ -456,43 +456,6 @@ class IOHandlerConfirm : public IOHandlerDelegate, public IOHandlerEditline { bool m_user_response; }; -class IOHandlerCursesGUI : public IOHandler { -public: - IOHandlerCursesGUI(Debugger &debugger); - - ~IOHandlerCursesGUI() override; - - void Run() override; - - void Cancel() override; - - bool Interrupt() override; - - void GotEOF() override; - - void Activate() override; - - void Deactivate() override; - -protected: - curses::ApplicationAP m_app_ap; -}; - -class IOHandlerCursesValueObjectList : public IOHandler { -public: - IOHandlerCursesValueObjectList(Debugger &debugger, - ValueObjectList &valobj_list); - - ~IOHandlerCursesValueObjectList() override; - - void Run() override; - - void GotEOF() override; - -protected: - ValueObjectList m_valobj_list; -}; - class IOHandlerStack { public: IOHandlerStack() = default; diff --git a/lldb/include/lldb/Core/IOHandlerCursesGUI.h b/lldb/include/lldb/Core/IOHandlerCursesGUI.h new file mode 100644 index 0000000000000..afa4352697255 --- /dev/null +++ b/lldb/include/lldb/Core/IOHandlerCursesGUI.h @@ -0,0 +1,40 @@ +//===-- IOHandlerCursesGUI.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_IOHandlerCursesGUI_h_ +#define liblldb_IOHandlerCursesGUI_h_ + +#include "lldb/Core/IOHandler.h" + +namespace lldb_private { + +class IOHandlerCursesGUI : public IOHandler { +public: + IOHandlerCursesGUI(Debugger &debugger); + + ~IOHandlerCursesGUI() override; + + void Run() override; + + void Cancel() override; + + bool Interrupt() override; + + void GotEOF() override; + + void Activate() override; + + void Deactivate() override; + +protected: + curses::ApplicationAP m_app_ap; +}; + +} // namespace lldb_private + +#endif // liblldb_IOHandlerCursesGUI_h_ diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index bb6c9bdad760f..2af18c83f23a3 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -190,7 +190,7 @@ class Module : public std::enable_shared_from_this, lldb::ModuleSP CalculateSymbolContextModule() override; void - GetDescription(Stream *s, + GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level = lldb::eDescriptionLevelFull); /// Get the module path and object name. diff --git a/lldb/include/lldb/Core/ModuleSpec.h b/lldb/include/lldb/Core/ModuleSpec.h index 651d0dc869bc2..6d024fe3434ba 100644 --- a/lldb/include/lldb/Core/ModuleSpec.h +++ b/lldb/include/lldb/Core/ModuleSpec.h @@ -207,7 +207,7 @@ class ModuleSpec { if (dumped_something) strm.PutCString(", "); strm.Printf("arch = "); - m_arch.DumpTriple(strm); + m_arch.DumpTriple(strm.AsRawOstream()); dumped_something = true; } if (m_uuid.IsValid()) { @@ -251,24 +251,18 @@ class ModuleSpec { if (match_module_spec.GetObjectName() && match_module_spec.GetObjectName() != GetObjectName()) return false; - if (match_module_spec.GetFileSpecPtr()) { - const FileSpec &fspec = match_module_spec.GetFileSpec(); - if (!FileSpec::Equal(fspec, GetFileSpec(), - !fspec.GetDirectory().IsEmpty())) - return false; - } - if (GetPlatformFileSpec() && match_module_spec.GetPlatformFileSpecPtr()) { - const FileSpec &fspec = match_module_spec.GetPlatformFileSpec(); - if (!FileSpec::Equal(fspec, GetPlatformFileSpec(), - !fspec.GetDirectory().IsEmpty())) - return false; + if (!FileSpec::Match(match_module_spec.GetFileSpec(), GetFileSpec())) + return false; + if (GetPlatformFileSpec() && + !FileSpec::Match(match_module_spec.GetPlatformFileSpec(), + GetPlatformFileSpec())) { + return false; } // Only match the symbol file spec if there is one in this ModuleSpec - if (GetSymbolFileSpec() && match_module_spec.GetSymbolFileSpecPtr()) { - const FileSpec &fspec = match_module_spec.GetSymbolFileSpec(); - if (!FileSpec::Equal(fspec, GetSymbolFileSpec(), - !fspec.GetDirectory().IsEmpty())) - return false; + if (GetSymbolFileSpec() && + !FileSpec::Match(match_module_spec.GetSymbolFileSpec(), + GetSymbolFileSpec())) { + return false; } if (match_module_spec.GetArchitecturePtr()) { if (exact_arch_match) { diff --git a/lldb/include/lldb/Core/STLUtils.h b/lldb/include/lldb/Core/STLUtils.h deleted file mode 100644 index f9500aa5594ed..0000000000000 --- a/lldb/include/lldb/Core/STLUtils.h +++ /dev/null @@ -1,26 +0,0 @@ -//===-- STLUtils.h ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef liblldb_STLUtils_h_ -#define liblldb_STLUtils_h_ - -#include - -#include -#include -#include - - -// C string less than compare function object -struct CStringCompareFunctionObject { - bool operator()(const char *s1, const char *s2) const { - return strcmp(s1, s2) < 0; - } -}; - -#endif // liblldb_STLUtils_h_ diff --git a/lldb/include/lldb/Core/SourceManager.h b/lldb/include/lldb/Core/SourceManager.h index bca817750d8da..f1f56d0886c3a 100644 --- a/lldb/include/lldb/Core/SourceManager.h +++ b/lldb/include/lldb/Core/SourceManager.h @@ -54,8 +54,6 @@ class SourceManager { bool LineIsValid(uint32_t line); - bool FileSpecMatches(const FileSpec &file_spec); - const FileSpec &GetFileSpec() { return m_file_spec; } uint32_t GetSourceMapModificationID() const { return m_source_map_mod_id; } diff --git a/lldb/include/lldb/Core/ThreadSafeDenseMap.h b/lldb/include/lldb/Core/ThreadSafeDenseMap.h index c485b91acb47a..420cb57635865 100644 --- a/lldb/include/lldb/Core/ThreadSafeDenseMap.h +++ b/lldb/include/lldb/Core/ThreadSafeDenseMap.h @@ -62,4 +62,4 @@ class ThreadSafeDenseMap { } // namespace lldb_private -#endif // liblldb_ThreadSafeSTLMap_h_ +#endif // liblldb_ThreadSafeDenseMap_h_ diff --git a/lldb/include/lldb/Core/ThreadSafeSTLMap.h b/lldb/include/lldb/Core/ThreadSafeSTLMap.h deleted file mode 100644 index df0208cd49b31..0000000000000 --- a/lldb/include/lldb/Core/ThreadSafeSTLMap.h +++ /dev/null @@ -1,128 +0,0 @@ -//===-- ThreadSafeSTLMap.h --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef liblldb_ThreadSafeSTLMap_h_ -#define liblldb_ThreadSafeSTLMap_h_ - -#include -#include - -#include "lldb/lldb-defines.h" - -namespace lldb_private { - -template class ThreadSafeSTLMap { -public: - typedef std::map<_Key, _Tp> collection; - typedef typename collection::iterator iterator; - typedef typename collection::const_iterator const_iterator; - // Constructors and Destructors - ThreadSafeSTLMap() : m_collection(), m_mutex() {} - - ~ThreadSafeSTLMap() {} - - bool IsEmpty() const { - std::lock_guard guard(m_mutex); - return m_collection.empty(); - } - - void Clear() { - std::lock_guard guard(m_mutex); - return m_collection.clear(); - } - - size_t Erase(const _Key &key) { - std::lock_guard guard(m_mutex); - return EraseNoLock(key); - } - - size_t EraseNoLock(const _Key &key) { return m_collection.erase(key); } - - bool GetValueForKey(const _Key &key, _Tp &value) const { - std::lock_guard guard(m_mutex); - return GetValueForKeyNoLock(key, value); - } - - // Call this if you have already manually locked the mutex using the - // GetMutex() accessor - bool GetValueForKeyNoLock(const _Key &key, _Tp &value) const { - const_iterator pos = m_collection.find(key); - if (pos != m_collection.end()) { - value = pos->second; - return true; - } - return false; - } - - bool GetFirstKeyForValue(const _Tp &value, _Key &key) const { - std::lock_guard guard(m_mutex); - return GetFirstKeyForValueNoLock(value, key); - } - - bool GetFirstKeyForValueNoLock(const _Tp &value, _Key &key) const { - const_iterator pos, end = m_collection.end(); - for (pos = m_collection.begin(); pos != end; ++pos) { - if (pos->second == value) { - key = pos->first; - return true; - } - } - return false; - } - - bool LowerBound(const _Key &key, _Key &match_key, _Tp &match_value, - bool decrement_if_not_equal) const { - std::lock_guard guard(m_mutex); - return LowerBoundNoLock(key, match_key, match_value, - decrement_if_not_equal); - } - - bool LowerBoundNoLock(const _Key &key, _Key &match_key, _Tp &match_value, - bool decrement_if_not_equal) const { - const_iterator pos = m_collection.lower_bound(key); - if (pos != m_collection.end()) { - match_key = pos->first; - if (decrement_if_not_equal && key != match_key && - pos != m_collection.begin()) { - --pos; - match_key = pos->first; - } - match_value = pos->second; - return true; - } - return false; - } - - iterator lower_bound_unsafe(const _Key &key) { - return m_collection.lower_bound(key); - } - - void SetValueForKey(const _Key &key, const _Tp &value) { - std::lock_guard guard(m_mutex); - SetValueForKeyNoLock(key, value); - } - - // Call this if you have already manually locked the mutex using the - // GetMutex() accessor - void SetValueForKeyNoLock(const _Key &key, const _Tp &value) { - m_collection[key] = value; - } - - std::recursive_mutex &GetMutex() { return m_mutex; } - -private: - collection m_collection; - mutable std::recursive_mutex m_mutex; - - // For ThreadSafeSTLMap only - DISALLOW_COPY_AND_ASSIGN(ThreadSafeSTLMap); -}; - -} // namespace lldb_private - -#endif // liblldb_ThreadSafeSTLMap_h_ diff --git a/lldb/include/lldb/Core/ThreadSafeSTLVector.h b/lldb/include/lldb/Core/ThreadSafeSTLVector.h deleted file mode 100644 index e1666a69ef7ea..0000000000000 --- a/lldb/include/lldb/Core/ThreadSafeSTLVector.h +++ /dev/null @@ -1,72 +0,0 @@ -//===-- ThreadSafeSTLVector.h ------------------------------------*- C++ -//-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef liblldb_ThreadSafeSTLVector_h_ -#define liblldb_ThreadSafeSTLVector_h_ - -#include -#include - -#include "lldb/lldb-defines.h" - -namespace lldb_private { - -template class ThreadSafeSTLVector { -public: - typedef std::vector<_Object> collection; - typedef typename collection::iterator iterator; - typedef typename collection::const_iterator const_iterator; - // Constructors and Destructors - ThreadSafeSTLVector() : m_collection(), m_mutex() {} - - ~ThreadSafeSTLVector() = default; - - bool IsEmpty() const { - std::lock_guard guard(m_mutex); - return m_collection.empty(); - } - - void Clear() { - std::lock_guard guard(m_mutex); - return m_collection.clear(); - } - - size_t GetCount() { - std::lock_guard guard(m_mutex); - return m_collection.size(); - } - - void AppendObject(_Object &object) { - std::lock_guard guard(m_mutex); - m_collection.push_back(object); - } - - _Object GetObject(size_t index) { - std::lock_guard guard(m_mutex); - return m_collection.at(index); - } - - void SetObject(size_t index, const _Object &object) { - std::lock_guard guard(m_mutex); - m_collection.at(index) = object; - } - - std::recursive_mutex &GetMutex() { return m_mutex; } - -private: - collection m_collection; - mutable std::recursive_mutex m_mutex; - - // For ThreadSafeSTLVector only - DISALLOW_COPY_AND_ASSIGN(ThreadSafeSTLVector); -}; - -} // namespace lldb_private - -#endif // liblldb_ThreadSafeSTLVector_h_ diff --git a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h index 3b14a3e9f3885..ec395095351d0 100644 --- a/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h +++ b/lldb/include/lldb/Core/ValueObjectSyntheticFilter.h @@ -9,8 +9,6 @@ #ifndef liblldb_ValueObjectSyntheticFilter_h_ #define liblldb_ValueObjectSyntheticFilter_h_ -#include "lldb/Core/ThreadSafeSTLMap.h" -#include "lldb/Core/ThreadSafeSTLVector.h" #include "lldb/Core/ValueObject.h" #include "lldb/Symbol/CompilerType.h" #include "lldb/Utility/ConstString.h" @@ -135,19 +133,24 @@ class ValueObjectSynthetic : public ValueObject { lldb::SyntheticChildrenSP m_synth_sp; std::unique_ptr m_synth_filter_up; - typedef ThreadSafeSTLMap ByIndexMap; - typedef ThreadSafeSTLMap NameToIndexMap; - typedef ThreadSafeSTLVector SyntheticChildrenCache; + typedef std::map ByIndexMap; + typedef std::map NameToIndexMap; + typedef std::vector SyntheticChildrenCache; typedef ByIndexMap::iterator ByIndexIterator; typedef NameToIndexMap::iterator NameToIndexIterator; + std::mutex m_child_mutex; + /// Guarded by m_child_mutex; ByIndexMap m_children_byindex; + /// Guarded by m_child_mutex; NameToIndexMap m_name_toindex; + /// Guarded by m_child_mutex; + SyntheticChildrenCache m_synthetic_children_cache; + uint32_t m_synthetic_children_count; // FIXME use the ValueObject's // ChildrenManager instead of a special // purpose solution - SyntheticChildrenCache m_synthetic_children_cache; ConstString m_parent_type_name; diff --git a/lldb/include/lldb/DataFormatters/FormatManager.h b/lldb/include/lldb/DataFormatters/FormatManager.h index afaafda47e761..66df8397dfee4 100644 --- a/lldb/include/lldb/DataFormatters/FormatManager.h +++ b/lldb/include/lldb/DataFormatters/FormatManager.h @@ -52,24 +52,15 @@ class FormatManager : public IFormatChangeListener { void EnableCategory(ConstString category_name, TypeCategoryMap::Position pos = TypeCategoryMap::Default) { - EnableCategory(category_name, pos, - std::initializer_list()); + EnableCategory(category_name, pos, {}); } void EnableCategory(ConstString category_name, TypeCategoryMap::Position pos, lldb::LanguageType lang) { - std::initializer_list langs = {lang}; - EnableCategory(category_name, pos, langs); - } - - void EnableCategory(ConstString category_name, - TypeCategoryMap::Position pos = TypeCategoryMap::Default, - std::initializer_list langs = {}) { TypeCategoryMap::ValueSP category_sp; if (m_categories_map.Get(category_name, category_sp) && category_sp) { m_categories_map.Enable(category_sp, pos); - for (const lldb::LanguageType lang : langs) - category_sp->AddLanguage(lang); + category_sp->AddLanguage(lang); } } diff --git a/lldb/include/lldb/DataFormatters/TypeCategory.h b/lldb/include/lldb/DataFormatters/TypeCategory.h index a5438226bbbb8..dc5edb6549407 100644 --- a/lldb/include/lldb/DataFormatters/TypeCategory.h +++ b/lldb/include/lldb/DataFormatters/TypeCategory.h @@ -214,8 +214,7 @@ class TypeCategoryImpl { ValidatorContainer::RegexMatchForEachCallback m_validator_regex; }; - TypeCategoryImpl(IFormatChangeListener *clist, ConstString name, - std::initializer_list langs = {}); + TypeCategoryImpl(IFormatChangeListener *clist, ConstString name); template void ForEach(const ForEachCallbacks &foreach) { GetTypeFormatsContainer()->ForEach(foreach.GetFormatExactCallback()); @@ -359,8 +358,6 @@ class TypeCategoryImpl { void AddLanguage(lldb::LanguageType lang); - bool HasLanguage(lldb::LanguageType lang); - std::string GetDescription(); bool AnyMatches(ConstString type_name, diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h index 65bf15531bc46..0cb2c6c5b6a14 100644 --- a/lldb/include/lldb/Host/Editline.h +++ b/lldb/include/lldb/Host/Editline.h @@ -133,6 +133,15 @@ enum class CursorLocation { /// session BlockEnd }; + +/// Operation for the history. +enum class HistoryOperation { + Oldest, + Older, + Current, + Newer, + Newest +}; } using namespace line_editor; @@ -258,11 +267,7 @@ class Editline { StringList GetInputAsStringList(int line_count = UINT32_MAX); /// Replaces the current multi-line session with the next entry from history. - /// When the parameter is - /// true it will take the next earlier entry from history, when it is false it - /// takes the next most - /// recent. - unsigned char RecallHistory(bool earlier); + unsigned char RecallHistory(HistoryOperation op); /// Character reading implementation for EditLine that supports our multi-line /// editing trickery. diff --git a/lldb/include/lldb/Interpreter/CommandReturnObject.h b/lldb/include/lldb/Interpreter/CommandReturnObject.h index 61e57fb798a1d..8af76e07e5ae1 100644 --- a/lldb/include/lldb/Interpreter/CommandReturnObject.h +++ b/lldb/include/lldb/Interpreter/CommandReturnObject.h @@ -9,7 +9,6 @@ #ifndef liblldb_CommandReturnObject_h_ #define liblldb_CommandReturnObject_h_ -#include "lldb/Core/STLUtils.h" #include "lldb/Core/StreamFile.h" #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StreamTee.h" diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index f4428c6821825..b2c284282f11e 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -41,15 +41,17 @@ namespace lldb_private { class Declaration; class ClangASTContext : public TypeSystem { + // LLVM RTTI support + static char ID; + public: typedef void (*CompleteTagDeclCallback)(void *baton, clang::TagDecl *); typedef void (*CompleteObjCInterfaceDeclCallback)(void *baton, clang::ObjCInterfaceDecl *); // llvm casting support - static bool classof(const TypeSystem *ts) { - return ts->getKind() == TypeSystem::eKindClang; - } + bool isA(const void *ClassID) const override { return ClassID == &ID; } + static bool classof(const TypeSystem *ts) { return ts->isA(&ID); } // Constructors and Destructors explicit ClangASTContext(llvm::StringRef triple = ""); @@ -148,13 +150,8 @@ class ClangASTContext : public TypeSystem { CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, size_t bit_size) override; - static CompilerType GetBuiltinTypeForEncodingAndBitSize( - clang::ASTContext *ast, lldb::Encoding encoding, uint32_t bit_size); - CompilerType GetBasicType(lldb::BasicType type); - CompilerType GetBasicType(ConstString name); - static lldb::BasicType GetBasicTypeEnumeration(ConstString name); CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(const char *type_name, @@ -906,7 +903,8 @@ class ClangASTContext : public TypeSystem { static clang::TypedefNameDecl *GetAsTypedefDecl(const CompilerType &type); - clang::CXXRecordDecl *GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type); + static clang::CXXRecordDecl * + GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type); static clang::ObjCInterfaceDecl * GetAsObjCInterfaceDecl(const CompilerType &type); diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index 7efbf792b1a92..aec5cc7c8743b 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -13,6 +13,7 @@ #include "lldb/Core/ModuleChild.h" #include "lldb/Symbol/DebugMacros.h" #include "lldb/Symbol/Function.h" +#include "lldb/Symbol/LineTable.h" #include "lldb/Symbol/SourceModule.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/UserID.h" @@ -35,7 +36,6 @@ namespace lldb_private { /// table. class CompileUnit : public std::enable_shared_from_this, public ModuleChild, - public FileSpec, public UserID, public SymbolContextScope { public: @@ -116,9 +116,6 @@ class CompileUnit : public std::enable_shared_from_this, const FileSpec &file_spec, lldb::user_id_t uid, lldb::LanguageType language, lldb_private::LazyBool is_optimized); - /// Destructor - ~CompileUnit() override; - /// Add a function to this compile unit. /// /// Typically called by the SymbolFile plug-ins as they partially parse the @@ -225,6 +222,9 @@ class CompileUnit : public std::enable_shared_from_this, const FileSpec *file_spec_ptr, bool exact, LineEntry *line_entry); + /// Return the primary source file associated with this compile unit. + const FileSpec &GetPrimaryFile() const { return m_file_spec; } + /// Get the line table for the compile unit. /// /// Called by clients and the SymbolFile plug-in. The SymbolFile plug-ins @@ -381,14 +381,11 @@ class CompileUnit : public std::enable_shared_from_this, /// A SymbolContext list class that will get any matching /// entries appended to. /// - /// \return - /// The number of new matches that were added to \a sc_list. - /// /// \see enum SymbolContext::Scope - uint32_t ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, - bool check_inlines, bool exact, - lldb::SymbolContextItem resolve_scope, - SymbolContextList &sc_list); + void ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, + bool check_inlines, bool exact, + lldb::SymbolContextItem resolve_scope, + SymbolContextList &sc_list); /// Get whether compiler optimizations were enabled for this compile unit /// @@ -418,6 +415,8 @@ class CompileUnit : public std::enable_shared_from_this, /// All modules, including the current module, imported by this /// compile unit. std::vector m_imported_modules; + /// The primary file associated with this compile unit. + FileSpec m_file_spec; /// Files associated with this compile unit's line table and /// declarations. FileSpecList m_support_files; diff --git a/lldb/include/lldb/Symbol/CompilerDecl.h b/lldb/include/lldb/Symbol/CompilerDecl.h index 4817ec4b22670..e4687ffb38536 100644 --- a/lldb/include/lldb/Symbol/CompilerDecl.h +++ b/lldb/include/lldb/Symbol/CompilerDecl.h @@ -18,13 +18,11 @@ namespace lldb_private { class CompilerDecl { public: // Constructors and Destructors - CompilerDecl() : m_type_system(nullptr), m_opaque_decl(nullptr) {} + CompilerDecl() = default; CompilerDecl(TypeSystem *type_system, void *decl) : m_type_system(type_system), m_opaque_decl(decl) {} - ~CompilerDecl() {} - // Tests explicit operator bool() const { return IsValid(); } @@ -39,8 +37,6 @@ class CompilerDecl { return m_type_system != nullptr && m_opaque_decl != nullptr; } - bool IsClang() const; - // Accessors TypeSystem *GetTypeSystem() const { return m_type_system; } @@ -75,8 +71,8 @@ class CompilerDecl { CompilerType GetFunctionArgumentType(size_t arg_idx) const; private: - TypeSystem *m_type_system; - void *m_opaque_decl; + TypeSystem *m_type_system = nullptr; + void *m_opaque_decl = nullptr; }; bool operator==(const CompilerDecl &lhs, const CompilerDecl &rhs); diff --git a/lldb/include/lldb/Symbol/CompilerDeclContext.h b/lldb/include/lldb/Symbol/CompilerDeclContext.h index e7958c08d8334..fe8539ab30e68 100644 --- a/lldb/include/lldb/Symbol/CompilerDeclContext.h +++ b/lldb/include/lldb/Symbol/CompilerDeclContext.h @@ -19,13 +19,11 @@ namespace lldb_private { class CompilerDeclContext { public: // Constructors and Destructors - CompilerDeclContext() : m_type_system(nullptr), m_opaque_decl_ctx(nullptr) {} + CompilerDeclContext() = default; CompilerDeclContext(TypeSystem *type_system, void *decl_ctx) : m_type_system(type_system), m_opaque_decl_ctx(decl_ctx) {} - ~CompilerDeclContext() {} - // Tests explicit operator bool() const { return IsValid(); } @@ -40,8 +38,6 @@ class CompilerDeclContext { return m_type_system != nullptr && m_opaque_decl_ctx != nullptr; } - bool IsClang() const; - std::vector FindDeclByName(ConstString name, const bool ignore_using_decls); @@ -105,8 +101,8 @@ class CompilerDeclContext { bool IsStructUnionOrClass() const; private: - TypeSystem *m_type_system; - void *m_opaque_decl_ctx; + TypeSystem *m_type_system = nullptr; + void *m_opaque_decl_ctx = nullptr; }; bool operator==(const CompilerDeclContext &lhs, const CompilerDeclContext &rhs); diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index cedd2523a5a89..91d9c5e48d20c 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -357,14 +357,6 @@ class CompilerType { bool GetValueAsScalar(const DataExtractor &data, lldb::offset_t data_offset, size_t data_byte_size, Scalar &value) const; - bool SetValueFromScalar(const Scalar &value, Stream &strm); - - bool ReadFromMemory(ExecutionContext *exe_ctx, lldb::addr_t addr, - AddressType address_type, DataExtractor &data); - - bool WriteToMemory(ExecutionContext *exe_ctx, lldb::addr_t addr, - AddressType address_type, StreamString &new_value); - void Clear() { m_type = nullptr; m_type_system = nullptr; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 6283d67baba52..ea860647fdb1c 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -52,47 +52,11 @@ struct LanguageSet { /// Interface for representing the Type Systems in different languages. class TypeSystem : public PluginInterface { public: - // Intrusive type system that allows us to use llvm casting. - // - // To add a new type system: - // - // 1 - Add a new enumeration for llvm casting below for your TypeSystem - // subclass, here we will use eKindFoo - // - // 2 - Your TypeSystem subclass will inherit from TypeSystem and needs - // to implement a static classof() function that returns your - // enumeration: - // - // class Foo : public lldb_private::TypeSystem - // { - // static bool classof(const TypeSystem *ts) - // { - // return ts->getKind() == TypeSystem::eKindFoo; - // } - // }; - // - // 3 - Contruct your TypeSystem subclass with the enumeration from below - // - // Foo() : - // TypeSystem(TypeSystem::eKindFoo), - // ... - // { - // } - // - // Then you can use the llvm casting on any "TypeSystem *" to get an instance - // of your subclass. - enum LLVMCastKind { - eKindClang, - eKindSwift, - kNumKinds - }; - // Constructors and Destructors - TypeSystem(LLVMCastKind kind); - ~TypeSystem() override; - LLVMCastKind getKind() const { return m_kind; } + // LLVM RTTI support + virtual bool isA(const void *ClassID) const = 0; static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language, Module *module); @@ -493,8 +457,7 @@ class TypeSystem : public PluginInterface { virtual bool IsMeaninglessWithoutDynamicResolution(void *type); protected: - const LLVMCastKind m_kind; // Support for llvm casting - SymbolFile *m_sym_file; + SymbolFile *m_sym_file = nullptr; }; class TypeSystemMap { diff --git a/lldb/include/lldb/Target/ABI.h b/lldb/include/lldb/Target/ABI.h index 93378abc2ac2c..1aff1e2f78174 100644 --- a/lldb/include/lldb/Target/ABI.h +++ b/lldb/include/lldb/Target/ABI.h @@ -126,12 +126,7 @@ class ABI : public PluginInterface { llvm::MCRegisterInfo &GetMCRegisterInfo() { return *m_mc_register_info_up; } - virtual const RegisterInfo *GetRegisterInfoArray(uint32_t &count) = 0; - - bool GetRegisterInfoByName(ConstString name, RegisterInfo &info); - - bool GetRegisterInfoByKind(lldb::RegisterKind reg_kind, uint32_t reg_num, - RegisterInfo &info); + virtual void AugmentRegisterInfo(RegisterInfo &info); virtual bool GetPointerReturnRegister(const char *&name) { return false; } @@ -143,6 +138,10 @@ class ABI : public PluginInterface { assert(m_mc_register_info_up && "ABI must have MCRegisterInfo"); } + bool GetRegisterInfoByName(ConstString name, RegisterInfo &info); + + virtual const RegisterInfo *GetRegisterInfoArray(uint32_t &count) = 0; + /// Utility function to construct a MCRegisterInfo using the ArchSpec triple. /// Plugins wishing to customize the construction can construct the /// MCRegisterInfo themselves. diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 81181a831a492..47c5c78704052 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -2185,11 +2185,9 @@ class Process : public std::enable_shared_from_this, OperatingSystem *GetOperatingSystem() { return m_os_up.get(); } - std::vector - GetLanguageRuntimes(bool retry_if_null = true); + std::vector GetLanguageRuntimes(); - LanguageRuntime *GetLanguageRuntime(lldb::LanguageType language, - bool retry_if_null = true); + LanguageRuntime *GetLanguageRuntime(lldb::LanguageType language); bool IsPossibleDynamicValue(ValueObject &in_value); diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h index ae79583768320..15e2fdb10c324 100644 --- a/lldb/include/lldb/Utility/ArchSpec.h +++ b/lldb/include/lldb/Utility/ArchSpec.h @@ -433,7 +433,7 @@ class ArchSpec { /// \return A triple describing this ArchSpec. const llvm::Triple &GetTriple() const { return m_triple; } - void DumpTriple(Stream &s) const; + void DumpTriple(llvm::raw_ostream &s) const; /// Architecture triple setter. /// diff --git a/lldb/include/lldb/Utility/Baton.h b/lldb/include/lldb/Utility/Baton.h index 4050f2af2bf04..c42867489c65d 100644 --- a/lldb/include/lldb/Utility/Baton.h +++ b/lldb/include/lldb/Utility/Baton.h @@ -12,6 +12,8 @@ #include "lldb/lldb-enumerations.h" #include "lldb/lldb-public.h" +#include "llvm/Support/raw_ostream.h" + #include namespace lldb_private { @@ -37,8 +39,9 @@ class Baton { virtual void *data() = 0; - virtual void GetDescription(Stream *s, - lldb::DescriptionLevel level) const = 0; + virtual void GetDescription(llvm::raw_ostream &s, + lldb::DescriptionLevel level, + unsigned indentation) const = 0; }; class UntypedBaton : public Baton { @@ -50,7 +53,8 @@ class UntypedBaton : public Baton { } void *data() override { return m_data; } - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override; + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override; void *m_data; // Leave baton public for easy access }; @@ -63,7 +67,8 @@ template class TypedBaton : public Baton { const T *getItem() const { return Item.get(); } void *data() override { return Item.get(); } - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override {} + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override {} protected: std::unique_ptr Item; diff --git a/lldb/include/lldb/Utility/FileSpec.h b/lldb/include/lldb/Utility/FileSpec.h index 53b0a9c08699c..61b6209bb3c02 100644 --- a/lldb/include/lldb/Utility/FileSpec.h +++ b/lldb/include/lldb/Utility/FileSpec.h @@ -75,18 +75,6 @@ class FileSpec { explicit FileSpec(llvm::StringRef path, const llvm::Triple &triple); - /// Copy constructor - /// - /// Makes a copy of the uniqued directory and filename strings from \a rhs - /// if it is not nullptr. - /// - /// \param[in] rhs - /// A const FileSpec object pointer to copy if non-nullptr. - FileSpec(const FileSpec *rhs); - - /// Destructor. - ~FileSpec(); - bool DirectoryEquals(const FileSpec &other) const; bool FileEquals(const FileSpec &other) const; @@ -195,6 +183,12 @@ class FileSpec { static bool Equal(const FileSpec &a, const FileSpec &b, bool full); + /// Match FileSpec \a pattern against FileSpec \a file. If \a pattern has a + /// directory component, then the \a file must have the same directory + /// component. Otherwise, just it matches just the filename. An empty \a + /// pattern matches everything. + static bool Match(const FileSpec &pattern, const FileSpec &file); + /// Attempt to guess path style for a given path string. It returns a style, /// if it was able to make a reasonable guess, or None if it wasn't. The guess /// will be correct if the input path was a valid absolute path on the system diff --git a/lldb/include/lldb/Utility/Stream.h b/lldb/include/lldb/Utility/Stream.h index 414f921773030..a3a33178086e7 100644 --- a/lldb/include/lldb/Utility/Stream.h +++ b/lldb/include/lldb/Utility/Stream.h @@ -213,85 +213,14 @@ class Stream { /// in one statement. Stream &operator<<(char ch); - /// Output a uint8_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint8_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint8_t uval); - - /// Output a uint16_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint16_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint16_t uval); - - /// Output a uint32_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint32_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint32_t uval); - - /// Output a uint64_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint64_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint64_t uval); - - /// Output a int8_t \a sval to the stream \a s. - /// - /// \param[in] sval - /// A int8_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(int8_t sval); - - /// Output a int16_t \a sval to the stream \a s. - /// - /// \param[in] sval - /// A int16_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(int16_t sval); - - /// Output a int32_t \a sval to the stream \a s. - /// - /// \param[in] sval - /// A int32_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(int32_t sval); - - /// Output a int64_t \a sval to the stream \a s. - /// - /// \param[in] sval - /// A int64_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(int64_t sval); + Stream &operator<<(uint8_t uval) = delete; + Stream &operator<<(uint16_t uval) = delete; + Stream &operator<<(uint32_t uval) = delete; + Stream &operator<<(uint64_t uval) = delete; + Stream &operator<<(int8_t sval) = delete; + Stream &operator<<(int16_t sval) = delete; + Stream &operator<<(int32_t sval) = delete; + Stream &operator<<(int64_t sval) = delete; /// Output an address value to this stream. /// @@ -373,8 +302,8 @@ class Stream { /// Get the current indentation level. /// /// \return - /// The current indentation level as an integer. - int GetIndentLevel() const; + /// The current indentation level. + unsigned GetIndentLevel() const; /// Indent the current line in the stream. /// @@ -388,10 +317,10 @@ class Stream { size_t Indent(llvm::StringRef s); /// Decrement the current indentation level. - void IndentLess(int amount = 2); + void IndentLess(unsigned amount = 2); /// Increment the current indentation level. - void IndentMore(int amount = 2); + void IndentMore(unsigned amount = 2); /// Output an offset value. /// @@ -446,7 +375,7 @@ class Stream { /// /// \param[in] level /// The new indentation level. - void SetIndentLevel(int level); + void SetIndentLevel(unsigned level); /// Output a SLEB128 number to the stream. /// @@ -477,7 +406,7 @@ class Stream { uint32_t m_addr_size; ///< Size of an address in bytes. lldb::ByteOrder m_byte_order; ///< Byte order to use when encoding scalar types. - int m_indent_level; ///< Indention level. + unsigned m_indent_level; ///< Indention level. std::size_t m_bytes_written = 0; ///< Number of bytes written so far. void _PutHex8(uint8_t uvalue, bool add_prefix); diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 3c80bcffec20e..0a92365544f99 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -690,6 +690,7 @@ enum SectionType { eSectionTypeDWARFDebugStrDwo, eSectionTypeDWARFDebugStrOffsetsDwo, eSectionTypeDWARFDebugTypesDwo, + eSectionTypeDWARFDebugRngListsDwo, }; FLAGS_ENUM(EmulateInstructionOptions){ diff --git a/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py new file mode 100644 index 0000000000000..f1a8656a73b55 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py @@ -0,0 +1,44 @@ +""" +Test 'breakpoint command list'. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test_list_commands(self): + src_dir = self.getSourceDir() + yaml_path = os.path.join(src_dir, "a.yaml") + yaml_base, ext = os.path.splitext(yaml_path) + obj_path = self.getBuildArtifact("main.o") + self.yaml2obj(yaml_path, obj_path) + + # Create a target with the object file we just created from YAML + target = self.dbg.CreateTarget(obj_path) + self.assertTrue(target, VALID_TARGET) + + # Test without any breakpoints. + self.expect("breakpoint command list 1", error=True, substrs=["error: No breakpoints exist for which to list commands"]) + + # Set a breakpoint + self.runCmd("b foo") + + # Check list breakpoint commands for breakpoints that have no commands. + self.expect("breakpoint command list 1", startstr="Breakpoint 1 does not have an associated command.") + + # Add a breakpoint command. + self.runCmd("breakpoint command add -o 'source list' 1") + + # List breakpoint command that we just created. + self.expect("breakpoint command list 1", startstr="""Breakpoint 1: + Breakpoint commands: + source list +""") + + # List breakpoint command with invalid breakpoint ID. + self.expect("breakpoint command list 2", error=True, startstr="error: '2' is not a currently valid breakpoint ID.") diff --git a/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml new file mode 100644 index 0000000000000..1007f60c19ee3 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml @@ -0,0 +1,18 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x0000000000000010 + Content: 554889E5897DFC5DC3 +Symbols: + - Name: foo + Type: STT_FUNC + Section: .text + Size: 0x0000000000000009 +... diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/multiline-navigation/TestMultilineNavigation.py b/lldb/packages/Python/lldbsuite/test/commands/expression/multiline-navigation/TestMultilineNavigation.py new file mode 100644 index 0000000000000..712111209215d --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/multiline-navigation/TestMultilineNavigation.py @@ -0,0 +1,67 @@ +""" +Tests navigating in the multiline expression editor. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test.lldbpexpect import PExpectTest + +class TestCase(PExpectTest): + + mydir = TestBase.compute_mydir(__file__) + + arrow_up = "\033[A" + arrow_down = "\033[B" + + # PExpect uses many timeouts internally and doesn't play well + # under ASAN on a loaded machine.. + @skipIfAsan + def test_nav_arrow_up(self): + """Tests that we can navigate back to the previous line with the up arrow""" + self.launch() + + # Start multiline expression mode by just running 'expr' + self.child.sendline("expr") + self.child.expect_exact("terminate with an empty line to evaluate") + # Create a simple integer expression '123' and press enter. + self.child.send("123\n") + # We should see the prompt for the second line of our expression. + self.child.expect_exact("2: ") + # Go back to the first line and change 123 to 124. + # Then press enter twice to evaluate our expression. + self.child.send(self.arrow_up + "\b4\n\n") + # The result of our expression should be 124 (our edited expression) + # and not 123 (the one we initially typed). + self.child.expect_exact("(int) $0 = 124") + + self.quit() + + @skipIfAsan + def test_nav_arrow_down(self): + """Tests that we can navigate to the next line with the down arrow""" + self.launch() + + # Start multiline expression mode by just running 'expr' + self.child.sendline("expr") + self.child.expect_exact("terminate with an empty line to evaluate") + # Create a simple integer expression '111' and press enter. + self.child.send("111\n") + # We should see the prompt for the second line of our expression. + self.child.expect_exact("2: ") + # Create another simple integer expression '222'. + self.child.send("222") + # Go back to the first line and change '111' to '111+' to make + # an addition operation that spans two lines. We need to go up to + # test that we can go back down again. + self.child.send(self.arrow_up + "+") + # Go back down to our second line and change '222' to '223' + # so that the full expression is now '111+\n223'. + # Then press enter twice to evaluate the expression. + self.child.send(self.arrow_down + "\b3\n\n") + # The result of our expression '111 + 223' should be '334'. + # If the expression is '333' then arrow down failed to get + # us back to the second line. + self.child.expect_exact("(int) $0 = 334") + + self.quit() diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py index e350e6ef930f9..61107077f9cff 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py @@ -7,6 +7,8 @@ class StaticInitializers(TestBase): mydir = TestBase.compute_mydir(__file__) + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44053") def test(self): """ Test a static initializer. """ self.build() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/breakpoint_names/TestBreakpointNames.py b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/breakpoint_names/TestBreakpointNames.py index 4a5ed87e330ff..9513278ba084d 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/breakpoint_names/TestBreakpointNames.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/breakpoint_names/TestBreakpointNames.py @@ -155,8 +155,13 @@ def do_check_illegal_names(self): def do_check_using_names(self): """Use Python APIs to check names work in place of breakpoint ID's.""" + # Create a dummy breakpoint to use up ID 1 + _ = self.target.BreakpointCreateByLocation(self.main_file_spec, 30) + + # Create a breakpiont to test with bkpt = self.target.BreakpointCreateByLocation(self.main_file_spec, 10) bkpt_name = "ABreakpoint" + bkpt_id = bkpt.GetID() other_bkpt_name= "_AnotherBreakpoint" # Add a name and make sure we match it: @@ -169,6 +174,7 @@ def do_check_using_names(self): self.assertTrue(bkpts.GetSize() == 1, "One breakpoint matched.") found_bkpt = bkpts.GetBreakpointAtIndex(0) self.assertTrue(bkpt.GetID() == found_bkpt.GetID(),"The right breakpoint.") + self.assertTrue(bkpt.GetID() == bkpt_id,"With the same ID as before.") retval = lldb.SBCommandReturnObject() self.dbg.GetCommandInterpreter().HandleCommand("break disable %s"%(bkpt_name), retval) diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/require_hw_breakpoints/TestRequireHWBreakpoints.py b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/require_hw_breakpoints/TestRequireHWBreakpoints.py index 020974ee469a8..4a571787f0118 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/require_hw_breakpoints/TestRequireHWBreakpoints.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/require_hw_breakpoints/TestRequireHWBreakpoints.py @@ -26,6 +26,8 @@ def test_breakpoint(self): self.assertTrue(breakpoint.IsHardware()) @skipIfWindows + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44055") def test_step_range(self): """Test stepping when hardware breakpoints are required.""" self.build() @@ -47,6 +49,8 @@ def test_step_range(self): in error.GetCString()) @skipIfWindows + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44055") def test_step_out(self): """Test stepping out when hardware breakpoints are required.""" self.build() @@ -67,6 +71,8 @@ def test_step_out(self): in error.GetCString()) @skipIfWindows + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44055") def test_step_over(self): """Test stepping over when hardware breakpoints are required.""" self.build() @@ -85,6 +91,8 @@ def test_step_over(self): ]) @skipIfWindows + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44055") def test_step_until(self): """Test stepping until when hardware breakpoints are required.""" self.build() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py index 4842bc0945519..817d7de6bb960 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py @@ -33,8 +33,7 @@ def test_search_depths(self): @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24528") def test_command_line(self): - """ Make sure we are called at the right depths depending on what we return - from __get_depth__""" + """ Test setting a resolver breakpoint from the command line """ self.build() self.do_test_cli() @@ -202,6 +201,23 @@ def do_test_cli(self): lldbutil.run_break_set_by_script(self, "resolver.Resolver", extra_options="-k symbol -v break_on_me") + # Make sure setting a resolver breakpoint doesn't pollute further breakpoint setting + # by checking the description of a regular file & line breakpoint to make sure it + # doesn't mention the Python Resolver function: + bkpt_no = lldbutil.run_break_set_by_file_and_line(self, "main.c", 12) + bkpt = target.FindBreakpointByID(bkpt_no) + strm = lldb.SBStream() + bkpt.GetDescription(strm, False) + used_resolver = "I am a python breakpoint resolver" in strm.GetData() + self.assertFalse(used_resolver, "Found the resolver description in the file & line breakpoint description.") + + # Also make sure the breakpoint was where we expected: + bp_loc = bkpt.GetLocationAtIndex(0) + bp_sc = bp_loc.GetAddress().GetSymbolContext(lldb.eSymbolContextEverything) + bp_se = bp_sc.GetLineEntry() + self.assertEqual(bp_se.GetLine(), 12, "Got the right line number") + self.assertEqual(bp_se.GetFileSpec().GetFilename(), "main.c", "Got the right filename") + def do_test_bad_options(self): target = self.make_target_and_import() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestTargetXMLArch.py b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestTargetXMLArch.py index 9ea7cc8a4c7ea..20e575ae978b0 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestTargetXMLArch.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestTargetXMLArch.py @@ -4,6 +4,101 @@ from lldbsuite.test.decorators import * from gdbclientutils import * +class MyResponder(MockGDBServerResponder): + def qXferRead(self, obj, annex, offset, length): + if annex == "target.xml": + return """ + + i386:x86-64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + """, False + else: + return None, False + + def qC(self): + return "QC1" + + def haltReason(self): + return "T05thread:00000001;06:9038d60f00700000;07:98b4062680ffffff;10:c0d7bf1b80ffffff;" + + def readRegister(self, register): + regs = {0x0: "00b0060000610000", + 0xa: "68fe471c80ffffff", + 0xc: "60574a1c80ffffff", + 0xd: "18f3042680ffffff", + 0xe: "be8a4d7142000000", + 0xf: "50df471c80ffffff", + 0x10: "c0d7bf1b80ffffff" } + if register in regs: + return regs[register] + else: + return "0000000000000000" + class TestTargetXMLArch(GDBRemoteTestBase): @skipIfXmlSupportMissing @@ -14,102 +109,6 @@ def test(self): Test lldb's parsing of the tag in the target.xml register description packet. """ - class MyResponder(MockGDBServerResponder): - - def qXferRead(self, obj, annex, offset, length): - if annex == "target.xml": - return """ - - i386:x86-64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - """, False - else: - return None, False - - def qC(self): - return "QC1" - - def haltReason(self): - return "T05thread:00000001;06:9038d60f00700000;07:98b4062680ffffff;10:c0d7bf1b80ffffff;" - - def readRegister(self, register): - regs = {0x0: "00b0060000610000", - 0xa: "68fe471c80ffffff", - 0xc: "60574a1c80ffffff", - 0xd: "18f3042680ffffff", - 0xe: "be8a4d7142000000", - 0xf: "50df471c80ffffff", - 0x10: "c0d7bf1b80ffffff" } - if register in regs: - return regs[register] - else: - return "0000000000000000" - self.server.responder = MyResponder() interp = self.dbg.GetCommandInterpreter() result = lldb.SBCommandReturnObject() @@ -125,3 +124,22 @@ def readRegister(self, register): interp.HandleCommand("target list", result) print(result.GetOutput()) self.assertTrue(target.GetTriple().startswith('x86_64-unknown-unknown')) + + @skipIfXmlSupportMissing + @skipIfRemote + def test_register_augmentation(self): + """ + Test that we correctly associate the register info with the eh_frame + register numbers. + """ + + target = self.createTarget("basic_eh_frame.yaml") + self.server.responder = MyResponder() + + process = self.connect(target) + lldbutil.expect_state_changes(self, self.dbg.GetListener(), process, + [lldb.eStateStopped]) + self.filecheck("image show-unwind -n foo", __file__, + "--check-prefix=UNWIND") +# UNWIND: eh_frame UnwindPlan: +# UNWIND: row[0]: 0: CFA=rsp+128 => rip=[CFA-8] diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/basic_eh_frame.yaml b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/basic_eh_frame.yaml new file mode 100644 index 0000000000000..384b9b992b407 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/basic_eh_frame.yaml @@ -0,0 +1,48 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x0000000000401000 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000401000 + AddressAlign: 0x0000000000000001 + Content: C3 + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x0000000000402000 + AddressAlign: 0x0000000000000008 + Content: 1800000000000000017A5200017810011B0C070890010E80010000001000000020000000DCEFFFFF0100000000000000 +Symbols: + - Name: .text + Type: STT_SECTION + Section: .text + Value: 0x0000000000401000 + - Name: .eh_frame + Type: STT_SECTION + Section: .eh_frame + Value: 0x0000000000402000 + - Name: _start + Binding: STB_GLOBAL + - Name: __bss_start + Section: .eh_frame + Binding: STB_GLOBAL + Value: 0x0000000000404000 + - Name: foo + Section: .text + Binding: STB_GLOBAL + Value: 0x0000000000401000 + - Name: _edata + Section: .eh_frame + Binding: STB_GLOBAL + Value: 0x0000000000404000 + - Name: _end + Section: .eh_frame + Binding: STB_GLOBAL + Value: 0x0000000000404000 +... diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/inline-stepping/TestInlineStepping.py b/lldb/packages/Python/lldbsuite/test/functionalities/inline-stepping/TestInlineStepping.py index a52cd4dd68653..ce4572361d931 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/inline-stepping/TestInlineStepping.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/inline-stepping/TestInlineStepping.py @@ -18,6 +18,8 @@ class TestInlineStepping(TestBase): compiler="icc", bugnumber="# Not really a bug. ICC combines two inlined functions.") @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr32343") + @expectedFailureAll(archs=["aarch64"], oslist=["linux"], + bugnumber="llvm.org/pr44057") def test_with_python_api(self): """Test stepping over and into inlined functions.""" self.build() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/load_unload/TestLoadUnload.py b/lldb/packages/Python/lldbsuite/test/functionalities/load_unload/TestLoadUnload.py index 02e9198e38dff..ae0934c746162 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/load_unload/TestLoadUnload.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/load_unload/TestLoadUnload.py @@ -363,12 +363,16 @@ def run_load_unload(self): @skipIfFreeBSD # llvm.org/pr14424 - missing FreeBSD Makefiles/testcase support @skipIfWindows # Windows doesn't have dlopen and friends, dynamic libraries work differently + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=27806") def test_step_over_load(self): self.setSvr4Support(False) self.run_step_over_load() @skipIfFreeBSD # llvm.org/pr14424 - missing FreeBSD Makefiles/testcase support @skipIfWindows # Windows doesn't have dlopen and friends, dynamic libraries work differently + @expectedFailureAll(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=27806") def test_step_over_load_with_svr4(self): self.setSvr4Support(True) self.run_step_over_load() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile index 1adf3fc44a694..a49ffa84c5478 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/Makefile @@ -1,5 +1,5 @@ LEVEL = ../../../make CXX_SOURCES := main.cpp include $(LEVEL)/Makefile.rules -CXXFLAGS_EXTRAS := -O1 -glldb -Xclang -femit-debug-entry-values +CXXFLAGS_EXTRAS := -O2 -glldb -Xclang -femit-debug-entry-values include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/TestBasicEntryValuesX86_64.py b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/TestBasicEntryValuesX86_64.py index 1192c2b672f6d..e0285e6d626d8 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/TestBasicEntryValuesX86_64.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/TestBasicEntryValuesX86_64.py @@ -6,8 +6,7 @@ supported_platforms.extend(lldbplatformutil.getDarwinOSTriples()) lldbinline.MakeInlineTest(__file__, globals(), - [decorators.skipIf(bugnumber="llvm.org/pr44059"), - decorators.skipUnlessPlatform(supported_platforms), + [decorators.skipUnlessPlatform(supported_platforms), decorators.skipIf(compiler="clang", compiler_version=['<', '10.0']), decorators.skipUnlessArch('x86_64'), decorators.skipUnlessHasCallSiteInfo, diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/main.cpp b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/main.cpp index ff72a81c6b293..9aac6e947838e 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/main.cpp +++ b/lldb/packages/Python/lldbsuite/test/functionalities/param_entry_vals/basic_entry_values_x86_64/main.cpp @@ -18,6 +18,14 @@ template __attribute__((noinline)) void use(T x) { /* Clobbers */ : "rsi" \ ); +// Destroy %rbx in the current frame. +#define DESTROY_RBX \ + asm volatile ("xorq %%rbx, %%rbx" \ + /* Outputs */ : \ + /* Inputs */ : \ + /* Clobbers */ : "rbx" \ + ); + struct S1 { int field1 = 123; int *field2 = &field1; @@ -30,10 +38,17 @@ void func1(int &sink, int x) { // Destroy 'x' in the current frame. DESTROY_RSI; - //% self.filecheck("image lookup -va $pc", "main.cpp", "-check-prefix=FUNC1-DESC") - // FUNC1-DESC: name = "x", type = "int", location = DW_OP_entry_value(DW_OP_reg4 RSI) + // NOTE: Currently, we do not generate DW_OP_entry_value for the 'x', + // since it gets copied into a register that is not callee saved, + // and we can not guarantee that its value has not changed. ++sink; + + // Destroy 'sink' in the current frame. + DESTROY_RBX; + + //% self.filecheck("image lookup -va $pc", "main.cpp", "-check-prefix=FUNC1-DESC") + // FUNC1-DESC: name = "sink", type = "int &", location = DW_OP_entry_value(DW_OP_reg5 RDI) } __attribute__((noinline)) @@ -43,10 +58,16 @@ void func2(int &sink, int x) { // Destroy 'x' in the current frame. DESTROY_RSI; - //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC2-EXPR") - // FUNC2-EXPR: (int) ${{.*}} = 123 + //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC2-EXPR-FAIL", expect_cmd_failure=True) + // FUNC2-EXPR-FAIL: couldn't get the value of variable x: variable not available ++sink; + + // Destroy 'sink' in the current frame. + DESTROY_RBX; + + //% self.filecheck("expr sink", "main.cpp", "-check-prefix=FUNC2-EXPR") + // FUNC2-EXPR: ${{.*}} = 2 } __attribute__((noinline)) @@ -69,10 +90,16 @@ void func4_amb(int &sink, int x) { // Destroy 'x' in the current frame. DESTROY_RSI; - //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC4-EXPR", expect_cmd_failure=True) - // FUNC4-EXPR: couldn't get the value of variable x: Could not evaluate DW_OP_entry_value. + //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC4-EXPR-FAIL", expect_cmd_failure=True) + // FUNC4-EXPR-FAIL: couldn't get the value of variable x: variable not available ++sink; + + // Destroy 'sink' in the current frame. + DESTROY_RBX; + + //% self.filecheck("expr sink", "main.cpp", "-check-prefix=FUNC4-EXPR", expect_cmd_failure=True) + // FUNC4-EXPR: couldn't get the value of variable sink: Could not evaluate DW_OP_entry_value. } __attribute__((noinline)) @@ -98,10 +125,16 @@ void func7(int &sink, int x) { // Destroy 'x' in the current frame. DESTROY_RSI; - //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC7-EXPR") - // FUNC7-EXPR: (int) ${{.*}} = 123 + //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC7-EXPR-FAIL", expect_cmd_failure=True) + // FUNC7-EXPR-FAIL: couldn't get the value of variable x: variable not available ++sink; + + // Destroy 'sink' in the current frame. + DESTROY_RBX; + + //% self.filecheck("expr sink", "main.cpp", "-check-prefix=FUNC7-EXPR") + // FUNC7-EXPR: ${{.*}} = 4 } __attribute__((always_inline)) @@ -129,10 +162,16 @@ void func11_tailcalled(int &sink, int x) { // Destroy 'x' in the current frame. DESTROY_RSI; - //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC11-EXPR") - // FUNC11-EXPR: (int) ${{.*}} = 123 + //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC11-EXPR-FAIL", expect_cmd_failure=True) + // FUNC11-EXPR-FAIL: couldn't get the value of variable x: variable not available ++sink; + + // Destroy 'sink' in the current frame. + DESTROY_RBX; + + //% self.filecheck("expr sink", "main.cpp", "-check-prefix=FUNC11-EXPR") + // FUNC11-EXPR: ${{.*}} = 5 } __attribute__((noinline)) @@ -150,10 +189,16 @@ void func13(int &sink, int x) { // Destroy 'x' in the current frame. DESTROY_RSI; - //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC13-EXPR") - // FUNC13-EXPR: (int) ${{.*}} = 123 + //% self.filecheck("expr x", "main.cpp", "-check-prefix=FUNC13-EXPR-FAIL", expect_cmd_failure=True) + // FUNC13-EXPR-FAIL: couldn't get the value of variable x: variable not available - ++sink; + use(sink); + + // Destroy 'sink' in the current frame. + DESTROY_RBX; + + //% self.filecheck("expr sink", "main.cpp", "-check-prefix=FUNC13-EXPR") + // FUNC13-EXPR: ${{.*}} = 5 } __attribute__((noinline, disable_tail_calls)) diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py index cb5dad50df8f7..e84bbc3c245d6 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py @@ -19,6 +19,9 @@ def affected_by_pr33042(self): return ("clang" in self.getCompiler() and self.getArchitecture() == "aarch64" and self.getPlatform() == "linux") + def affected_by_pr44132(self): + return (self.getArchitecture() == "aarch64" and self.getPlatform() == "linux") + # ABIMacOSX_arm can't fetch simple values inside a structure def affected_by_radar_34562999(self): return (self.getArchitecture() == 'armv7' or self.getArchitecture() == 'armv7k') and self.platformIsDarwin() @@ -123,7 +126,7 @@ def test_with_python(self): #self.assertTrue(in_float == return_float) - if not self.affected_by_radar_34562999(): + if not self.affected_by_radar_34562999() and not self.affected_by_pr44132(): self.return_and_test_struct_value("return_one_int") self.return_and_test_struct_value("return_two_int") self.return_and_test_struct_value("return_three_int") @@ -182,10 +185,12 @@ def test_vector_values(self): self.return_and_test_struct_value("return_vector_size_float32_8") self.return_and_test_struct_value("return_vector_size_float32_16") - self.return_and_test_struct_value("return_vector_size_float32_32") + if not self.affected_by_pr44132(): + self.return_and_test_struct_value("return_vector_size_float32_32") self.return_and_test_struct_value("return_ext_vector_size_float32_2") self.return_and_test_struct_value("return_ext_vector_size_float32_4") - self.return_and_test_struct_value("return_ext_vector_size_float32_8") + if not self.affected_by_pr44132(): + self.return_and_test_struct_value("return_ext_vector_size_float32_8") # limit the nested struct and class tests to only x86_64 @skipIf(archs=no_match(['x86_64'])) diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/show_location/TestShowLocationDwarf5.py b/lldb/packages/Python/lldbsuite/test/functionalities/show_location/TestShowLocationDwarf5.py index a56282efd77db..1d4bc6f134500 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/show_location/TestShowLocationDwarf5.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/show_location/TestShowLocationDwarf5.py @@ -9,6 +9,8 @@ class TestTargetSourceMap(TestBase): mydir = TestBase.compute_mydir(__file__) + @skipIf(archs="aarch64", oslist="linux", + bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44180") def test_source_map(self): # Set the target soure map to map "./" to the current test directory. yaml_path = os.path.join(self.getSourceDir(), "a.yaml") diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/step-avoids-no-debug/TestStepNoDebug.py b/lldb/packages/Python/lldbsuite/test/functionalities/step-avoids-no-debug/TestStepNoDebug.py index c4fae7da0135a..ceee901fe3063 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/step-avoids-no-debug/TestStepNoDebug.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/step-avoids-no-debug/TestStepNoDebug.py @@ -50,6 +50,8 @@ def test_step_over_with_python(self): archs=["i386"], bugnumber="llvm.org/pr28549") @expectedFailureAll(oslist=["ios", "tvos", "bridgeos"], bugnumber="") # lldb doesn't step past last source line in function on arm64 + @expectedFailureAll(archs=["aarch64"], oslist=["linux"], + bugnumber="llvm.org/pr44057") def test_step_in_with_python(self): """Test stepping in using avoid-no-debug with dwarf.""" self.build() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/TestSteppingOutWithArtificialFrames.py b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/TestSteppingOutWithArtificialFrames.py index 2b432e56a7405..687fb0e7a5e86 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/TestSteppingOutWithArtificialFrames.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/TestSteppingOutWithArtificialFrames.py @@ -71,6 +71,8 @@ def test_stepping_out_past_artificial_frame(self): self.assertFalse(frame2.IsArtificial()) @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr26265") + @expectedFailureAll(archs=["aarch64"], oslist=["linux"], + bugnumber="llvm.org/pr44160") def test_return_past_artificial_frame(self): self.build() thread = self.prepare_thread() diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/lang/c/step-target/TestStepTarget.py b/lldb/packages/Python/lldbsuite/test/lang/c/step-target/TestStepTarget.py index c694bda97c282..b3786fb94454f 100644 --- a/lldb/packages/Python/lldbsuite/test/lang/c/step-target/TestStepTarget.py +++ b/lldb/packages/Python/lldbsuite/test/lang/c/step-target/TestStepTarget.py @@ -32,7 +32,7 @@ def get_to_start(self): break_in_main = target.BreakpointCreateBySourceRegex( 'Break here to try targetted stepping', self.main_source_spec) self.assertTrue(break_in_main, VALID_BREAKPOINT) - self.assertTrue(break_in_main.GetNumLocations() > 0, "Has locations.") + self.assertGreater(break_in_main.GetNumLocations(), 0, "Has locations.") # Now launch the process, and do not stop at entry point. process = target.LaunchSimple( @@ -60,7 +60,7 @@ def test_with_end_line(self): thread.StepInto("lotsOfArgs", self.end_line, error) frame = thread.frames[0] - self.assertTrue(frame.name == "lotsOfArgs", "Stepped to lotsOfArgs.") + self.assertEqual(frame.name, "lotsOfArgs", "Stepped to lotsOfArgs.") @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr32343") def test_with_end_line_bad_name(self): @@ -71,8 +71,7 @@ def test_with_end_line_bad_name(self): error = lldb.SBError() thread.StepInto("lotsOfArgssss", self.end_line, error) frame = thread.frames[0] - self.assertTrue( - frame.line_entry.line == self.end_line, + self.assertEqual(frame.line_entry.line, self.end_line, "Stepped to the block end.") @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr32343") @@ -84,7 +83,7 @@ def test_with_end_line_deeper(self): error = lldb.SBError() thread.StepInto("modifyInt", self.end_line, error) frame = thread.frames[0] - self.assertTrue(frame.name == "modifyInt", "Stepped to modifyInt.") + self.assertEqual(frame.name, "modifyInt", "Stepped to modifyInt.") @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr32343") def test_with_command_and_block(self): @@ -100,7 +99,7 @@ def test_with_command_and_block(self): "thread step-in command succeeded.") frame = thread.frames[0] - self.assertTrue(frame.name == "lotsOfArgs", "Stepped to lotsOfArgs.") + self.assertEqual(frame.name, "lotsOfArgs", "Stepped to lotsOfArgs.") @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr32343") def test_with_command_and_block_and_bad_name(self): @@ -117,9 +116,8 @@ def test_with_command_and_block_and_bad_name(self): frame = thread.frames[0] - self.assertTrue(frame.name == "main", "Stepped back out to main.") + self.assertEqual(frame.name, "main", "Stepped back out to main.") # end_line is set to the line after the containing block. Check that # we got there: - self.assertTrue( - frame.line_entry.line == self.end_line, + self.assertEqual(frame.line_entry.line, self.end_line, "Got out of the block") diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py b/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py index 9a203ef3a3088..78f7fa3afd73b 100644 --- a/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py @@ -18,6 +18,8 @@ class TestTrivialABI(TestBase): @skipUnlessSupportedTypeAttribute("trivial_abi") @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr37995") + @expectedFailureAll(archs=["aarch64"], oslist=["linux"], + bugnumber="llvm.org/pr44161") def test_call_trivial(self): """Test that we can print a variable & call a function with a trivial ABI class.""" self.build() @@ -27,6 +29,8 @@ def test_call_trivial(self): @skipUnlessSupportedTypeAttribute("trivial_abi") # fixed for SysV-x86_64 ABI, but not Windows-x86_64 @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr36870") + @expectedFailureAll(archs=["aarch64"], oslist=["linux"], + bugnumber="llvm.org/pr44161") def test_call_nontrivial(self): """Test that we can print a variable & call a function on the same class w/o the trivial ABI marker.""" self.build() diff --git a/lldb/packages/Python/lldbsuite/test/linux/builtin_trap/TestBuiltinTrap.py b/lldb/packages/Python/lldbsuite/test/linux/builtin_trap/TestBuiltinTrap.py index 951f59e611e2f..28debcee9da1a 100644 --- a/lldb/packages/Python/lldbsuite/test/linux/builtin_trap/TestBuiltinTrap.py +++ b/lldb/packages/Python/lldbsuite/test/linux/builtin_trap/TestBuiltinTrap.py @@ -24,7 +24,7 @@ def setUp(self): # gcc generates incorrect linetable @expectedFailureAll(archs="arm", compiler="gcc", triple=".*-android") - @expectedFailureAll(oslist=['linux'], archs=['arm']) + @expectedFailureAll(oslist=['linux'], archs=['arm', 'aarch64']) @skipIfWindows def test_with_run_command(self): """Test that LLDB handles a function with __builtin_trap correctly.""" diff --git a/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py b/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py index 1bc52b3e66769..5c87d74e22d22 100644 --- a/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py +++ b/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py @@ -68,17 +68,17 @@ def cleanup(): self.expect("frame variable foo.E", substrs=['b8cca70a']) - format.format = lldb.eFormatOctal + format.SetFormat(lldb.eFormatOctal) category.AddTypeFormat(lldb.SBTypeNameSpecifier("int"), format) self.expect("frame variable foo.A", - substrs=['01']) + substrs=[' 01']) self.expect("frame variable foo.E", substrs=['b8cca70a']) category.DeleteTypeFormat(lldb.SBTypeNameSpecifier("int")) category.DeleteTypeFormat(lldb.SBTypeNameSpecifier("long")) self.expect("frame variable foo.A", matching=False, - substrs=['01']) + substrs=[' 01']) self.expect("frame variable foo.E", matching=False, substrs=['b8cca70a']) @@ -90,10 +90,13 @@ def cleanup(): new_category.IsValid(), "getting a non-existing category worked") new_category = self.dbg.CreateCategory("foobar") - new_category.enabled = True + new_category.SetEnabled(True) new_category.AddTypeSummary( lldb.SBTypeNameSpecifier( - "^.*t$", True), summary) + "^.*t$", + True, # is_regexp + ), summary) + self.expect("frame variable foo.A", substrs=['hello world']) self.expect("frame variable foo.E", matching=False, @@ -102,7 +105,7 @@ def cleanup(): substrs=['hello world']) self.expect("frame variable foo.F", substrs=['hello world']) - new_category.enabled = False + new_category.SetEnabled(False) self.expect("frame variable foo.A", matching=False, substrs=['hello world']) self.expect("frame variable foo.E", matching=False, @@ -379,7 +382,7 @@ def cleanup(): lldb.SBTypeSummary.CreateWithScriptCode("return 'hello scripted world';")) self.expect("frame variable foo", matching=False, substrs=['hello scripted world']) - new_category.enabled = True + new_category.SetEnabled(True) self.expect("frame variable foo", matching=True, substrs=['hello scripted world']) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py index 7a39079b472a8..2b7f28a3aefbc 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py @@ -675,7 +675,6 @@ def test_Hg_switches_to_3_threads_launch_debugserver(self): self.Hg_switches_to_3_threads() @expectedFailureAll(oslist=["windows"]) # expect 4 threads - @expectedFailureNetBSD @llgs_test def test_Hg_switches_to_3_threads_launch_llgs(self): self.init_llgs_test() @@ -1583,7 +1582,6 @@ def test_P_and_p_thread_suffix_work_debugserver(self): self.P_and_p_thread_suffix_work() @skipIfWindows - @expectedFailureNetBSD @llgs_test def test_P_and_p_thread_suffix_work_llgs(self): self.init_llgs_test() diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp index 581bda3635073..d52040d850a95 100644 --- a/lldb/source/API/SBCompileUnit.cpp +++ b/lldb/source/API/SBCompileUnit.cpp @@ -50,7 +50,7 @@ SBFileSpec SBCompileUnit::GetFileSpec() const { SBFileSpec file_spec; if (m_opaque_ptr) - file_spec.SetFileSpec(*m_opaque_ptr); + file_spec.SetFileSpec(m_opaque_ptr->GetPrimaryFile()); return LLDB_RECORD_RESULT(file_spec); } @@ -106,7 +106,7 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line, if (inline_file_spec && inline_file_spec->IsValid()) file_spec = inline_file_spec->ref(); else - file_spec = *m_opaque_ptr; + file_spec = m_opaque_ptr->GetPrimaryFile(); index = m_opaque_ptr->FindLineEntry( start_idx, line, inline_file_spec ? inline_file_spec->get() : nullptr, diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index 82dc60489008c..090a3a57a2f4a 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -294,7 +294,7 @@ void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) { SBError SBDebugger::SetInputFile(FileSP file_sp) { LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp); - return SetInputFile(SBFile(file_sp)); + return LLDB_RECORD_RESULT(SetInputFile(SBFile(file_sp))); } // Shouldn't really be settable after initialization as this could cause lots @@ -306,7 +306,7 @@ SBError SBDebugger::SetInputFile(SBFile file) { SBError error; if (!m_opaque_sp) { error.ref().SetErrorString("invalid debugger"); - return error; + return LLDB_RECORD_RESULT(error); } repro::DataRecorder *recorder = nullptr; @@ -330,16 +330,16 @@ SBError SBDebugger::SetInputFile(SBFile file) { if (!file_sp || !file_sp->IsValid()) { error.ref().SetErrorString("invalid file"); - return error; + return LLDB_RECORD_RESULT(error); } m_opaque_sp->SetInputFile(file_sp, recorder); - return error; + return LLDB_RECORD_RESULT(error); } SBError SBDebugger::SetOutputFile(FileSP file_sp) { LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (FileSP), file_sp); - return SetOutputFile(SBFile(file_sp)); + return LLDB_RECORD_RESULT(SetOutputFile(SBFile(file_sp))); } void SBDebugger::SetOutputFileHandle(FILE *fh, bool transfer_ownership) { @@ -353,14 +353,14 @@ SBError SBDebugger::SetOutputFile(SBFile file) { SBError error; if (!m_opaque_sp) { error.ref().SetErrorString("invalid debugger"); - return error; + return LLDB_RECORD_RESULT(error); } if (!file) { error.ref().SetErrorString("invalid file"); - return error; + return LLDB_RECORD_RESULT(error); } m_opaque_sp->SetOutputFile(file.m_opaque_sp); - return error; + return LLDB_RECORD_RESULT(error); } void SBDebugger::SetErrorFileHandle(FILE *fh, bool transfer_ownership) { @@ -371,7 +371,7 @@ void SBDebugger::SetErrorFileHandle(FILE *fh, bool transfer_ownership) { SBError SBDebugger::SetErrorFile(FileSP file_sp) { LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (FileSP), file_sp); - return SetErrorFile(SBFile(file_sp)); + return LLDB_RECORD_RESULT(SetErrorFile(SBFile(file_sp))); } SBError SBDebugger::SetErrorFile(SBFile file) { @@ -379,14 +379,14 @@ SBError SBDebugger::SetErrorFile(SBFile file) { SBError error; if (!m_opaque_sp) { error.ref().SetErrorString("invalid debugger"); - return error; + return LLDB_RECORD_RESULT(error); } if (!file) { error.ref().SetErrorString("invalid file"); - return error; + return LLDB_RECORD_RESULT(error); } m_opaque_sp->SetErrorFile(file.m_opaque_sp); - return error; + return LLDB_RECORD_RESULT(error); } FILE *SBDebugger::GetInputFileHandle() { @@ -395,7 +395,7 @@ FILE *SBDebugger::GetInputFileHandle() { File &file_sp = m_opaque_sp->GetInputFile(); return LLDB_RECORD_RESULT(file_sp.GetStream()); } - return nullptr; + return LLDB_RECORD_RESULT(nullptr); } SBFile SBDebugger::GetInputFile() { @@ -412,7 +412,7 @@ FILE *SBDebugger::GetOutputFileHandle() { StreamFile &stream_file = m_opaque_sp->GetOutputStream(); return LLDB_RECORD_RESULT(stream_file.GetFile().GetStream()); } - return nullptr; + return LLDB_RECORD_RESULT(nullptr); } SBFile SBDebugger::GetOutputFile() { @@ -431,7 +431,7 @@ FILE *SBDebugger::GetErrorFileHandle() { StreamFile &stream_file = m_opaque_sp->GetErrorStream(); return LLDB_RECORD_RESULT(stream_file.GetFile().GetStream()); } - return nullptr; + return LLDB_RECORD_RESULT(nullptr); } SBFile SBDebugger::GetErrorFile() { diff --git a/lldb/source/API/SBFile.cpp b/lldb/source/API/SBFile.cpp index f5a38efe4a779..277402f31abf7 100644 --- a/lldb/source/API/SBFile.cpp +++ b/lldb/source/API/SBFile.cpp @@ -100,24 +100,27 @@ SBError SBFile::Close() { SBFile::operator bool() const { LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, operator bool); - return LLDB_RECORD_RESULT(IsValid()); + return IsValid(); } bool SBFile::operator!() const { LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, operator!); - return LLDB_RECORD_RESULT(!IsValid()); + return !IsValid(); } FileSP SBFile::GetFile() const { LLDB_RECORD_METHOD_CONST_NO_ARGS(FileSP, SBFile, GetFile); - return m_opaque_sp; + return LLDB_RECORD_RESULT(m_opaque_sp); } namespace lldb_private { namespace repro { template <> void RegisterMethods(Registry &R) { - + LLDB_REGISTER_CONSTRUCTOR(SBFile, ()); + LLDB_REGISTER_CONSTRUCTOR(SBFile, (FileSP)); + LLDB_REGISTER_CONSTRUCTOR(SBFile, (FILE *, bool)); + LLDB_REGISTER_CONSTRUCTOR(SBFile, (int, const char *, bool)); LLDB_REGISTER_METHOD(lldb::SBError, SBFile, Flush, ()); LLDB_REGISTER_METHOD_CONST(bool, SBFile, IsValid, ()); LLDB_REGISTER_METHOD_CONST(bool, SBFile, operator bool,()); diff --git a/lldb/source/API/SBFileSpec.cpp b/lldb/source/API/SBFileSpec.cpp index 2f910b9ba294e..2e7eba42bc909 100644 --- a/lldb/source/API/SBFileSpec.cpp +++ b/lldb/source/API/SBFileSpec.cpp @@ -143,7 +143,7 @@ void SBFileSpec::SetDirectory(const char *directory) { } uint32_t SBFileSpec::GetPath(char *dst_path, size_t dst_len) const { - LLDB_RECORD_METHOD_CONST(uint32_t, SBFileSpec, GetPath, (char *, size_t), + LLDB_RECORD_DUMMY(uint32_t, SBFileSpec, GetPath, (char *, size_t), dst_path, dst_len); uint32_t result = m_opaque_up->GetPath(dst_path, dst_len); diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp index 7ac189bb42737..4e9dfb0c1e62e 100644 --- a/lldb/source/API/SBModule.cpp +++ b/lldb/source/API/SBModule.cpp @@ -245,7 +245,7 @@ bool SBModule::GetDescription(SBStream &description) { ModuleSP module_sp(GetSP()); if (module_sp) { - module_sp->GetDescription(&strm); + module_sp->GetDescription(strm.AsRawOstream()); } else strm.PutCString("No value"); diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp index d50d95ebb5476..1107161a419f1 100644 --- a/lldb/source/API/SBReproducer.cpp +++ b/lldb/source/API/SBReproducer.cpp @@ -22,8 +22,8 @@ #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBHostOS.h" #include "lldb/API/SBReproducer.h" - #include "lldb/Host/FileSystem.h" +#include "lldb/lldb-private.h" using namespace lldb; using namespace lldb_private; @@ -124,7 +124,7 @@ const char *SBReproducer::Capture(const char *path) { return nullptr; } -const char *SBReproducer::Replay(const char *path) { +const char *SBReproducer::Replay(const char *path, bool skip_version_check) { static std::string error; if (auto e = Reproducer::Initialize(ReproducerMode::Replay, FileSpec(path))) { error = llvm::toString(std::move(e)); @@ -137,6 +137,22 @@ const char *SBReproducer::Replay(const char *path) { return error.c_str(); } + if (!skip_version_check) { + llvm::Expected version = loader->LoadBuffer(); + if (!version) { + error = llvm::toString(version.takeError()); + return error.c_str(); + } + if (lldb_private::GetVersion() != llvm::StringRef(*version).rtrim()) { + error = "reproducer capture and replay version don't match:\n"; + error.append("reproducer captured with:\n"); + error.append(*version); + error.append("reproducer replayed with:\n"); + error.append(lldb_private::GetVersion()); + return error.c_str(); + } + } + FileSpec file = loader->GetFile(); if (!file) { error = "unable to get replay data from reproducer."; diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 7013e2b45e5ff..312e4df758631 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -1176,12 +1176,15 @@ bool SBTarget::FindBreakpointsByName(const char *name, TargetSP target_sp(GetSP()); if (target_sp) { std::lock_guard guard(target_sp->GetAPIMutex()); - BreakpointList bkpt_list(false); - bool is_valid = - target_sp->GetBreakpointList().FindBreakpointsByName(name, bkpt_list); - if (!is_valid) + llvm::Expected> expected_vector = + target_sp->GetBreakpointList().FindBreakpointsByName(name); + if (!expected_vector) { + LLDB_LOG(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_BREAKPOINTS), + "invalid breakpoint name: {}", + llvm::toString(expected_vector.takeError())); return false; - for (BreakpointSP bkpt_sp : bkpt_list.Breakpoints()) { + } + for (BreakpointSP bkpt_sp : *expected_vector) { bkpts.AppendByID(bkpt_sp->GetID()); } } diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 8d4930bf6edb0..f7f748f568321 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -914,9 +914,10 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame, const bool exact = false; SymbolContextList sc_list; - const uint32_t num_matches = frame_sc.comp_unit->ResolveSymbolContext( - step_file_spec, line, check_inlines, exact, eSymbolContextLineEntry, - sc_list); + frame_sc.comp_unit->ResolveSymbolContext(step_file_spec, line, + check_inlines, exact, + eSymbolContextLineEntry, sc_list); + const uint32_t num_matches = sc_list.GetSize(); if (num_matches > 0) { SymbolContext sc; for (uint32_t i = 0; i < num_matches; ++i) { @@ -1036,7 +1037,7 @@ SBError SBThread::JumpToLine(lldb::SBFileSpec &file_spec, uint32_t line) { Thread *thread = exe_ctx.GetThreadPtr(); - Status err = thread->JumpToLine(file_spec.get(), line, true); + Status err = thread->JumpToLine(file_spec.ref(), line, true); sb_error.SetError(err); return LLDB_RECORD_RESULT(sb_error); } diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index a112542803c47..13acf4bb92e20 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -638,7 +638,8 @@ static bool SymbolContextsMightBeEquivalent(SymbolContext &old_sc, } else { // Otherwise we will compare by name... if (old_sc.comp_unit && new_sc.comp_unit) { - if (FileSpec::Equal(*old_sc.comp_unit, *new_sc.comp_unit, true)) { + if (old_sc.comp_unit->GetPrimaryFile() == + new_sc.comp_unit->GetPrimaryFile()) { // Now check the functions: if (old_sc.function && new_sc.function && (old_sc.function->GetName() == new_sc.function->GetName())) { diff --git a/lldb/source/Breakpoint/BreakpointList.cpp b/lldb/source/Breakpoint/BreakpointList.cpp index c80fb917b4903..5b23c633d14c6 100644 --- a/lldb/source/Breakpoint/BreakpointList.cpp +++ b/lldb/source/Breakpoint/BreakpointList.cpp @@ -10,6 +10,8 @@ #include "lldb/Target/Target.h" +#include "llvm/Support/Errc.h" + using namespace lldb; using namespace lldb_private; @@ -128,22 +130,24 @@ BreakpointSP BreakpointList::FindBreakpointByID(break_id_t break_id) const { return {}; } -bool BreakpointList::FindBreakpointsByName(const char *name, - BreakpointList &matching_bps) { - Status error; +llvm::Expected> +BreakpointList::FindBreakpointsByName(const char *name) { if (!name) - return false; + return llvm::createStringError(llvm::errc::invalid_argument, + "FindBreakpointsByName requires a name"); + Status error; if (!BreakpointID::StringIsBreakpointName(llvm::StringRef(name), error)) - return false; + return error.ToError(); + std::vector matching_bps; for (BreakpointSP bkpt_sp : Breakpoints()) { if (bkpt_sp->MatchesName(name)) { - matching_bps.Add(bkpt_sp, false); + matching_bps.push_back(bkpt_sp); } } - return true; + return matching_bps; } void BreakpointList::Dump(Stream *s) const { diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index 46b8f25c56682..e6d7d85f90605 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -525,7 +525,7 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.comp_unit != nullptr) { s->EOL(); s->Indent("compile unit = "); - static_cast(sc.comp_unit)->GetFilename().Dump(s); + sc.comp_unit->GetPrimaryFile().GetFilename().Dump(s); if (sc.function != nullptr) { s->EOL(); diff --git a/lldb/source/Breakpoint/BreakpointOptions.cpp b/lldb/source/Breakpoint/BreakpointOptions.cpp index 0d4c6173c3c54..8fd16f420c04f 100644 --- a/lldb/source/Breakpoint/BreakpointOptions.cpp +++ b/lldb/source/Breakpoint/BreakpointOptions.cpp @@ -566,7 +566,8 @@ void BreakpointOptions::GetDescription(Stream *s, if (m_callback_baton_sp.get()) { if (level != eDescriptionLevelBrief) { s->EOL(); - m_callback_baton_sp->GetDescription(s, level); + m_callback_baton_sp->GetDescription(s->AsRawOstream(), level, + s->GetIndentLevel()); } } if (!m_condition_text.empty()) { @@ -578,35 +579,33 @@ void BreakpointOptions::GetDescription(Stream *s, } void BreakpointOptions::CommandBaton::GetDescription( - Stream *s, lldb::DescriptionLevel level) const { + llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const { const CommandData *data = getItem(); if (level == eDescriptionLevelBrief) { - s->Printf(", commands = %s", - (data && data->user_source.GetSize() > 0) ? "yes" : "no"); + s << ", commands = " + << ((data && data->user_source.GetSize() > 0) ? "yes" : "no"); return; } - s->IndentMore(); - s->Indent("Breakpoint commands"); + indentation += 2; + s.indent(indentation); + s << "Breakpoint commands"; if (data->interpreter != eScriptLanguageNone) - s->Printf(" (%s):\n", - ScriptInterpreter::LanguageToString(data->interpreter).c_str()); + s << llvm::formatv(" ({0}):\n", + ScriptInterpreter::LanguageToString(data->interpreter)); else - s->PutCString(":\n"); + s << ":\n"; - s->IndentMore(); + indentation += 2; if (data && data->user_source.GetSize() > 0) { - const size_t num_strings = data->user_source.GetSize(); - for (size_t i = 0; i < num_strings; ++i) { - s->Indent(data->user_source.GetStringAtIndex(i)); - s->EOL(); + for (llvm::StringRef str : data->user_source) { + s.indent(indentation); + s << str << "\n"; } - } else { - s->PutCString("No commands.\n"); - } - s->IndentLess(); - s->IndentLess(); + } else + s << "No commands.\n"; } void BreakpointOptions::SetCommandDataCallback( diff --git a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp index 3cb04263c6dcb..6b600a7cf128f 100644 --- a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp @@ -102,7 +102,7 @@ Searcher::CallbackReturn BreakpointResolverFileRegex::SearchCallback( return eCallbackReturnContinue; CompileUnit *cu = context.comp_unit; - FileSpec cu_file_spec = *(static_cast(cu)); + FileSpec cu_file_spec = cu->GetPrimaryFile(); std::vector line_matches; context.target_sp->GetSourceManager().FindLinesMatchingRegex( cu_file_spec, m_regex, 1, UINT32_MAX, line_matches); diff --git a/lldb/source/Breakpoint/WatchpointOptions.cpp b/lldb/source/Breakpoint/WatchpointOptions.cpp index cd5ef930e5dcf..026bf2f746aef 100644 --- a/lldb/source/Breakpoint/WatchpointOptions.cpp +++ b/lldb/source/Breakpoint/WatchpointOptions.cpp @@ -121,7 +121,8 @@ void WatchpointOptions::GetCallbackDescription( Stream *s, lldb::DescriptionLevel level) const { if (m_callback_baton_sp.get()) { s->EOL(); - m_callback_baton_sp->GetDescription(s, level); + m_callback_baton_sp->GetDescription(s->AsRawOstream(), level, + s->GetIndentLevel()); } } @@ -156,27 +157,26 @@ void WatchpointOptions::GetDescription(Stream *s, } void WatchpointOptions::CommandBaton::GetDescription( - Stream *s, lldb::DescriptionLevel level) const { + llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const { const CommandData *data = getItem(); if (level == eDescriptionLevelBrief) { - s->Printf(", commands = %s", - (data && data->user_source.GetSize() > 0) ? "yes" : "no"); + s << ", commands = %s" + << ((data && data->user_source.GetSize() > 0) ? "yes" : "no"); return; } - s->IndentMore(); - s->Indent("watchpoint commands:\n"); + indentation += 2; + s.indent(indentation); + s << "watchpoint commands:\n"; - s->IndentMore(); + indentation += 2; if (data && data->user_source.GetSize() > 0) { for (const std::string &line : data->user_source) { - s->Indent(line); - s->EOL(); + s.indent(indentation); + s << line << "\n"; } - } else { - s->PutCString("No commands.\n"); - } - s->IndentLess(); - s->IndentLess(); + } else + s << "No commands.\n"; } diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp index 469a6bbbadf65..b382e26e2b704 100644 --- a/lldb/source/Commands/CommandCompletions.cpp +++ b/lldb/source/Commands/CommandCompletions.cpp @@ -378,8 +378,10 @@ CommandCompletions::SourceFileCompleter::SearchCallback(SearchFilter &filter, } } } else { - const char *cur_file_name = context.comp_unit->GetFilename().GetCString(); - const char *cur_dir_name = context.comp_unit->GetDirectory().GetCString(); + const char *cur_file_name = + context.comp_unit->GetPrimaryFile().GetFilename().GetCString(); + const char *cur_dir_name = + context.comp_unit->GetPrimaryFile().GetDirectory().GetCString(); bool match = false; if (m_file_name && cur_file_name && @@ -391,7 +393,7 @@ CommandCompletions::SourceFileCompleter::SearchCallback(SearchFilter &filter, match = false; if (match) { - m_matching_files.AppendIfUnique(context.comp_unit); + m_matching_files.AppendIfUnique(context.comp_unit->GetPrimaryFile()); } } } @@ -411,10 +413,7 @@ void CommandCompletions::SourceFileCompleter::DoCompletion( // SymbolCompleter static bool regex_chars(const char comp) { - return (comp == '[' || comp == ']' || comp == '(' || comp == ')' || - comp == '{' || comp == '}' || comp == '+' || comp == '.' || - comp == '*' || comp == '|' || comp == '^' || comp == '$' || - comp == '\\' || comp == '?'); + return llvm::StringRef("[](){}+.*|^$\\?").contains(comp); } CommandCompletions::SymbolCompleter::SymbolCompleter( diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index 1a4432149f731..a82e70a1cdaba 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -674,10 +674,10 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { if (baton) { result.GetOutputStream().Printf("Breakpoint %s:\n", id_str.GetData()); - result.GetOutputStream().IndentMore(); - baton->GetDescription(&result.GetOutputStream(), - eDescriptionLevelFull); - result.GetOutputStream().IndentLess(); + baton->GetDescription(result.GetOutputStream().AsRawOstream(), + eDescriptionLevelFull, + result.GetOutputStream().GetIndentLevel() + + 2); } else { result.AppendMessageWithFormat( "Breakpoint %s does not have an associated command.\n", diff --git a/lldb/source/Commands/CommandObjectGUI.cpp b/lldb/source/Commands/CommandObjectGUI.cpp index fac2e96277839..898468a977f3f 100644 --- a/lldb/source/Commands/CommandObjectGUI.cpp +++ b/lldb/source/Commands/CommandObjectGUI.cpp @@ -8,6 +8,7 @@ #include "CommandObjectGUI.h" +#include "lldb/Core/IOHandlerCursesGUI.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/lldb-private.h" diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index fd1b158afb16d..807c04f4c65e5 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -146,12 +146,6 @@ class CommandObjectSourceInfo : public CommandObjectParsed { Target *target = m_exe_ctx.GetTargetPtr(); uint32_t num_matches = 0; - bool has_path = false; - if (file_spec) { - assert(file_spec.GetFilename().AsCString()); - has_path = (file_spec.GetDirectory().AsCString() != nullptr); - } - // Dump all the line entries for the file in the list. ConstString last_module_file_name; uint32_t num_scs = sc_list.GetSize(); @@ -168,8 +162,7 @@ class CommandObjectSourceInfo : public CommandObjectParsed { if (module_list.GetSize() && module_list.GetIndexForModule(module) == LLDB_INVALID_INDEX32) continue; - if (file_spec && !lldb_private::FileSpec::Equal( - file_spec, line_entry.file, has_path)) + if (!FileSpec::Match(file_spec, line_entry.file)) continue; if (start_line > 0 && line_entry.line < start_line) continue; @@ -250,13 +243,13 @@ class CommandObjectSourceInfo : public CommandObjectParsed { num_matches++; if (num_lines > 0 && num_matches > num_lines) break; - assert(lldb_private::FileSpec::Equal(cu_file_spec, line_entry.file, - has_path)); + assert(cu_file_spec == line_entry.file); if (!cu_header_printed) { if (num_matches > 0) strm << "\n\n"; strm << "Lines found for file " << file_spec_name - << " in compilation unit " << cu->GetFilename() << " in `" + << " in compilation unit " + << cu->GetPrimaryFile().GetFilename() << " in `" << module_file_name << "\n"; cu_header_printed = true; } @@ -1077,7 +1070,8 @@ class CommandObjectSourceList : public CommandObjectParsed { if (m_options.show_bp_locs) { m_breakpoint_locations.Clear(); const bool show_inlines = true; - m_breakpoint_locations.Reset(*sc.comp_unit, 0, show_inlines); + m_breakpoint_locations.Reset(sc.comp_unit->GetPrimaryFile(), 0, + show_inlines); SearchFilterForUnconstrainedSearches target_search_filter( target->shared_from_this()); target_search_filter.Search(m_breakpoint_locations); @@ -1106,8 +1100,8 @@ class CommandObjectSourceList : public CommandObjectParsed { ? sc.line_entry.column : 0; target->GetSourceManager().DisplaySourceLinesWithLineNumbers( - sc.comp_unit, sc.line_entry.line, column, lines_to_back_up, - m_options.num_lines - lines_to_back_up, "->", + sc.comp_unit->GetPrimaryFile(), sc.line_entry.line, column, + lines_to_back_up, m_options.num_lines - lines_to_back_up, "->", &result.GetOutputStream(), GetBreakpointLocations()); result.SetStatus(eReturnStatusSuccessFinishResult); } @@ -1190,18 +1184,18 @@ class CommandObjectSourceList : public CommandObjectParsed { if (num_matches > 1) { bool got_multiple = false; - FileSpec *test_cu_spec = nullptr; + CompileUnit *test_cu = nullptr; for (unsigned i = 0; i < num_matches; i++) { SymbolContext sc; sc_list.GetContextAtIndex(i, sc); if (sc.comp_unit) { - if (test_cu_spec) { - if (test_cu_spec != static_cast(sc.comp_unit)) + if (test_cu) { + if (test_cu != sc.comp_unit) got_multiple = true; break; } else - test_cu_spec = sc.comp_unit; + test_cu = sc.comp_unit; } } if (got_multiple) { @@ -1218,7 +1212,8 @@ class CommandObjectSourceList : public CommandObjectParsed { if (sc.comp_unit) { if (m_options.show_bp_locs) { const bool show_inlines = true; - m_breakpoint_locations.Reset(*sc.comp_unit, 0, show_inlines); + m_breakpoint_locations.Reset(sc.comp_unit->GetPrimaryFile(), 0, + show_inlines); SearchFilterForUnconstrainedSearches target_search_filter( target->shared_from_this()); target_search_filter.Search(m_breakpoint_locations); @@ -1229,7 +1224,7 @@ class CommandObjectSourceList : public CommandObjectParsed { m_options.num_lines = 10; const uint32_t column = 0; target->GetSourceManager().DisplaySourceLinesWithLineNumbers( - sc.comp_unit, m_options.start_line, column, 0, + sc.comp_unit->GetPrimaryFile(), m_options.start_line, column, 0, m_options.num_lines, "", &result.GetOutputStream(), GetBreakpointLocations()); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index d77207bb82cfc..ac3188740234e 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -78,7 +78,7 @@ static void DumpTargetInfo(uint32_t target_idx, Target *target, uint32_t properties = 0; if (target_arch.IsValid()) { strm.Printf("%sarch=", properties++ > 0 ? ", " : " ( "); - target_arch.DumpTriple(strm); + target_arch.DumpTriple(strm.AsRawOstream()); properties++; } PlatformSP platform_sp(target->GetPlatform()); @@ -816,15 +816,14 @@ class CommandObjectTargetVariable : public CommandObjectParsed { return; if (sc.module_sp) { if (sc.comp_unit) { - s.Printf("Global variables for %s in %s:\n", - sc.comp_unit->GetPath().c_str(), - sc.module_sp->GetFileSpec().GetPath().c_str()); + s.Format("Global variables for {0} in {1}:\n", + sc.comp_unit->GetPrimaryFile(), sc.module_sp->GetFileSpec()); } else { s.Printf("Global variables for %s\n", sc.module_sp->GetFileSpec().GetPath().c_str()); } } else if (sc.comp_unit) { - s.Printf("Global variables for %s\n", sc.comp_unit->GetPath().c_str()); + s.Format("Global variables for {0}\n", sc.comp_unit->GetPrimaryFile()); } for (VariableSP var_sp : variable_list) { @@ -926,9 +925,9 @@ class CommandObjectTargetVariable : public CommandObjectParsed { if (!success) { if (frame) { if (comp_unit) - result.AppendErrorWithFormat( - "no global variables in current compile unit: %s\n", - comp_unit->GetPath().c_str()); + result.AppendErrorWithFormatv( + "no global variables in current compile unit: {0}\n", + comp_unit->GetPrimaryFile()); else result.AppendErrorWithFormat( "no debug information for frame %u\n", @@ -1292,7 +1291,7 @@ static void DumpModuleArchitecture(Stream &strm, Module *module, StreamString arch_strm; if (full_triple) - module->GetArchitecture().DumpTriple(arch_strm); + module->GetArchitecture().DumpTriple(arch_strm.AsRawOstream()); else arch_strm.PutCString(module->GetArchitecture().GetArchitectureName()); std::string arch_str = arch_strm.GetString(); @@ -1327,8 +1326,8 @@ static uint32_t DumpCompileUnitLineTable(CommandInterpreter &interpreter, if (i > 0) strm << "\n\n"; - strm << "Line table for " << *static_cast(sc.comp_unit) - << " in `" << module->GetFileSpec().GetFilename() << "\n"; + strm << "Line table for " << sc.comp_unit->GetPrimaryFile() << " in `" + << module->GetFileSpec().GetFilename() << "\n"; LineTable *line_table = sc.comp_unit->GetLineTable(); if (line_table) line_table->GetDescription( diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index c93bd9d5c2323..13c17dfe3cca2 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -526,7 +526,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), m_step_type(step_type), m_step_scope(step_scope), m_options(), - m_class_options("scripted step", 'C') { + m_class_options("scripted step") { CommandArgumentEntry arg; CommandArgumentData thread_id_arg; @@ -1193,7 +1193,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { LineEntry line_entry; const bool exact = false; start_idx_ptr = sc.comp_unit->FindLineEntry( - start_idx_ptr, line_number, sc.comp_unit, exact, &line_entry); + start_idx_ptr, line_number, nullptr, exact, &line_entry); if (start_idx_ptr == UINT32_MAX) break; diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 5683381efc858..92a91cfac2208 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -611,10 +611,10 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { const Baton *baton = wp_options->GetBaton(); if (baton) { result.GetOutputStream().Printf("Watchpoint %u:\n", cur_wp_id); - result.GetOutputStream().IndentMore(); - baton->GetDescription(&result.GetOutputStream(), - eDescriptionLevelFull); - result.GetOutputStream().IndentLess(); + baton->GetDescription(result.GetOutputStream().AsRawOstream(), + eDescriptionLevelFull, + result.GetOutputStream().GetIndentLevel() + + 2); } else { result.AppendMessageWithFormat( "Watchpoint %u does not have an associated command.\n", diff --git a/lldb/source/Core/AddressResolverFileLine.cpp b/lldb/source/Core/AddressResolverFileLine.cpp index 4a14260c6c72f..4122b5d3b747d 100644 --- a/lldb/source/Core/AddressResolverFileLine.cpp +++ b/lldb/source/Core/AddressResolverFileLine.cpp @@ -40,14 +40,13 @@ Searcher::CallbackReturn AddressResolverFileLine::SearchCallback(SearchFilter &filter, SymbolContext &context, Address *addr) { SymbolContextList sc_list; - uint32_t sc_list_size; CompileUnit *cu = context.comp_unit; Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_BREAKPOINTS)); - sc_list_size = - cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, - eSymbolContextEverything, sc_list); + cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, + eSymbolContextEverything, sc_list); + uint32_t sc_list_size = sc_list.GetSize(); for (uint32_t i = 0; i < sc_list_size; i++) { SymbolContext sc; if (sc_list.GetContextAtIndex(i, sc)) { diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index f3ce87ae4f231..a6f7ba8dc25ba 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -37,6 +37,7 @@ add_lldb_library(lldbCore FormatEntity.cpp Highlighter.cpp IOHandler.cpp + IOHandlerCursesGUI.cpp Mangled.cpp Module.cpp ModuleChild.cpp diff --git a/lldb/source/Core/FileLineResolver.cpp b/lldb/source/Core/FileLineResolver.cpp index 01df295398a83..7d91d1a3e472c 100644 --- a/lldb/source/Core/FileLineResolver.cpp +++ b/lldb/source/Core/FileLineResolver.cpp @@ -36,8 +36,8 @@ FileLineResolver::SearchCallback(SearchFilter &filter, SymbolContext &context, Address *addr) { CompileUnit *cu = context.comp_unit; - if (m_inlines || - m_file_spec.Compare(*cu, m_file_spec, (bool)m_file_spec.GetDirectory())) { + if (m_inlines || m_file_spec.Compare(cu->GetPrimaryFile(), m_file_spec, + (bool)m_file_spec.GetDirectory())) { uint32_t start_file_idx = 0; uint32_t file_idx = cu->GetSupportFiles().FindFileIndex(start_file_idx, m_file_spec, false); diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp index c90828f40989c..07ca0a68a10b4 100644 --- a/lldb/source/Core/FormatEntity.cpp +++ b/lldb/source/Core/FormatEntity.cpp @@ -1376,8 +1376,7 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, if (sc) { CompileUnit *cu = sc->comp_unit; if (cu) { - // CompileUnit is a FileSpec - if (DumpFile(s, *cu, (FileKind)entry.number)) + if (DumpFile(s, cu->GetPrimaryFile(), (FileKind)entry.number)) return true; } } diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp index d11248094e056..38e65e63d5870 100644 --- a/lldb/source/Core/IOHandler.cpp +++ b/lldb/source/Core/IOHandler.cpp @@ -8,11 +8,6 @@ #include "lldb/Core/IOHandler.h" -#ifndef LLDB_DISABLE_CURSES -#include -#include -#endif - #if defined(__APPLE__) #include #endif @@ -32,24 +27,6 @@ #endif #include "lldb/Interpreter/CommandCompletions.h" #include "lldb/Interpreter/CommandInterpreter.h" -#ifndef LLDB_DISABLE_CURSES -#include "lldb/Breakpoint/BreakpointLocation.h" -#include "lldb/Core/Module.h" -#include "lldb/Core/ValueObject.h" -#include "lldb/Core/ValueObjectRegister.h" -#include "lldb/Symbol/Block.h" -#include "lldb/Symbol/Function.h" -#include "lldb/Symbol/Symbol.h" -#include "lldb/Symbol/VariableList.h" -#include "lldb/Target/Process.h" -#include "lldb/Target/RegisterContext.h" -#include "lldb/Target/StackFrame.h" -#include "lldb/Target/StopInfo.h" -#include "lldb/Target/Target.h" -#include "lldb/Target/Thread.h" -#include "lldb/Utility/State.h" -#endif - #include "llvm/ADT/StringRef.h" #ifdef _WIN32 @@ -631,3994 +608,3 @@ void IOHandlerEditline::PrintAsync(Stream *stream, const char *s, size_t len) { #endif } } - -// we may want curses to be disabled for some builds for instance, windows -#ifndef LLDB_DISABLE_CURSES - -#define KEY_RETURN 10 -#define KEY_ESCAPE 27 - -namespace curses { -class Menu; -class MenuDelegate; -class Window; -class WindowDelegate; -typedef std::shared_ptr MenuSP; -typedef std::shared_ptr MenuDelegateSP; -typedef std::shared_ptr WindowSP; -typedef std::shared_ptr WindowDelegateSP; -typedef std::vector Menus; -typedef std::vector Windows; -typedef std::vector WindowDelegates; - -#if 0 -type summary add -s "x=${var.x}, y=${var.y}" curses::Point -type summary add -s "w=${var.width}, h=${var.height}" curses::Size -type summary add -s "${var.origin%S} ${var.size%S}" curses::Rect -#endif - -struct Point { - int x; - int y; - - Point(int _x = 0, int _y = 0) : x(_x), y(_y) {} - - void Clear() { - x = 0; - y = 0; - } - - Point &operator+=(const Point &rhs) { - x += rhs.x; - y += rhs.y; - return *this; - } - - void Dump() { printf("(x=%i, y=%i)\n", x, y); } -}; - -bool operator==(const Point &lhs, const Point &rhs) { - return lhs.x == rhs.x && lhs.y == rhs.y; -} - -bool operator!=(const Point &lhs, const Point &rhs) { - return lhs.x != rhs.x || lhs.y != rhs.y; -} - -struct Size { - int width; - int height; - Size(int w = 0, int h = 0) : width(w), height(h) {} - - void Clear() { - width = 0; - height = 0; - } - - void Dump() { printf("(w=%i, h=%i)\n", width, height); } -}; - -bool operator==(const Size &lhs, const Size &rhs) { - return lhs.width == rhs.width && lhs.height == rhs.height; -} - -bool operator!=(const Size &lhs, const Size &rhs) { - return lhs.width != rhs.width || lhs.height != rhs.height; -} - -struct Rect { - Point origin; - Size size; - - Rect() : origin(), size() {} - - Rect(const Point &p, const Size &s) : origin(p), size(s) {} - - void Clear() { - origin.Clear(); - size.Clear(); - } - - void Dump() { - printf("(x=%i, y=%i), w=%i, h=%i)\n", origin.x, origin.y, size.width, - size.height); - } - - void Inset(int w, int h) { - if (size.width > w * 2) - size.width -= w * 2; - origin.x += w; - - if (size.height > h * 2) - size.height -= h * 2; - origin.y += h; - } - - // Return a status bar rectangle which is the last line of this rectangle. - // This rectangle will be modified to not include the status bar area. - Rect MakeStatusBar() { - Rect status_bar; - if (size.height > 1) { - status_bar.origin.x = origin.x; - status_bar.origin.y = size.height; - status_bar.size.width = size.width; - status_bar.size.height = 1; - --size.height; - } - return status_bar; - } - - // Return a menubar rectangle which is the first line of this rectangle. This - // rectangle will be modified to not include the menubar area. - Rect MakeMenuBar() { - Rect menubar; - if (size.height > 1) { - menubar.origin.x = origin.x; - menubar.origin.y = origin.y; - menubar.size.width = size.width; - menubar.size.height = 1; - ++origin.y; - --size.height; - } - return menubar; - } - - void HorizontalSplitPercentage(float top_percentage, Rect &top, - Rect &bottom) const { - float top_height = top_percentage * size.height; - HorizontalSplit(top_height, top, bottom); - } - - void HorizontalSplit(int top_height, Rect &top, Rect &bottom) const { - top = *this; - if (top_height < size.height) { - top.size.height = top_height; - bottom.origin.x = origin.x; - bottom.origin.y = origin.y + top.size.height; - bottom.size.width = size.width; - bottom.size.height = size.height - top.size.height; - } else { - bottom.Clear(); - } - } - - void VerticalSplitPercentage(float left_percentage, Rect &left, - Rect &right) const { - float left_width = left_percentage * size.width; - VerticalSplit(left_width, left, right); - } - - void VerticalSplit(int left_width, Rect &left, Rect &right) const { - left = *this; - if (left_width < size.width) { - left.size.width = left_width; - right.origin.x = origin.x + left.size.width; - right.origin.y = origin.y; - right.size.width = size.width - left.size.width; - right.size.height = size.height; - } else { - right.Clear(); - } - } -}; - -bool operator==(const Rect &lhs, const Rect &rhs) { - return lhs.origin == rhs.origin && lhs.size == rhs.size; -} - -bool operator!=(const Rect &lhs, const Rect &rhs) { - return lhs.origin != rhs.origin || lhs.size != rhs.size; -} - -enum HandleCharResult { - eKeyNotHandled = 0, - eKeyHandled = 1, - eQuitApplication = 2 -}; - -enum class MenuActionResult { - Handled, - NotHandled, - Quit // Exit all menus and quit -}; - -struct KeyHelp { - int ch; - const char *description; -}; - -class WindowDelegate { -public: - virtual ~WindowDelegate() = default; - - virtual bool WindowDelegateDraw(Window &window, bool force) { - return false; // Drawing not handled - } - - virtual HandleCharResult WindowDelegateHandleChar(Window &window, int key) { - return eKeyNotHandled; - } - - virtual const char *WindowDelegateGetHelpText() { return nullptr; } - - virtual KeyHelp *WindowDelegateGetKeyHelp() { return nullptr; } -}; - -class HelpDialogDelegate : public WindowDelegate { -public: - HelpDialogDelegate(const char *text, KeyHelp *key_help_array); - - ~HelpDialogDelegate() override; - - bool WindowDelegateDraw(Window &window, bool force) override; - - HandleCharResult WindowDelegateHandleChar(Window &window, int key) override; - - size_t GetNumLines() const { return m_text.GetSize(); } - - size_t GetMaxLineLength() const { return m_text.GetMaxStringLength(); } - -protected: - StringList m_text; - int m_first_visible_line; -}; - -class Window { -public: - Window(const char *name) - : m_name(name), m_window(nullptr), m_panel(nullptr), m_parent(nullptr), - m_subwindows(), m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX), - m_prev_active_window_idx(UINT32_MAX), m_delete(false), - m_needs_update(true), m_can_activate(true), m_is_subwin(false) {} - - Window(const char *name, WINDOW *w, bool del = true) - : m_name(name), m_window(nullptr), m_panel(nullptr), m_parent(nullptr), - m_subwindows(), m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX), - m_prev_active_window_idx(UINT32_MAX), m_delete(del), - m_needs_update(true), m_can_activate(true), m_is_subwin(false) { - if (w) - Reset(w); - } - - Window(const char *name, const Rect &bounds) - : m_name(name), m_window(nullptr), m_parent(nullptr), m_subwindows(), - m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX), - m_prev_active_window_idx(UINT32_MAX), m_delete(true), - m_needs_update(true), m_can_activate(true), m_is_subwin(false) { - Reset(::newwin(bounds.size.height, bounds.size.width, bounds.origin.y, - bounds.origin.y)); - } - - virtual ~Window() { - RemoveSubWindows(); - Reset(); - } - - void Reset(WINDOW *w = nullptr, bool del = true) { - if (m_window == w) - return; - - if (m_panel) { - ::del_panel(m_panel); - m_panel = nullptr; - } - if (m_window && m_delete) { - ::delwin(m_window); - m_window = nullptr; - m_delete = false; - } - if (w) { - m_window = w; - m_panel = ::new_panel(m_window); - m_delete = del; - } - } - - void AttributeOn(attr_t attr) { ::wattron(m_window, attr); } - void AttributeOff(attr_t attr) { ::wattroff(m_window, attr); } - void Box(chtype v_char = ACS_VLINE, chtype h_char = ACS_HLINE) { - ::box(m_window, v_char, h_char); - } - void Clear() { ::wclear(m_window); } - void Erase() { ::werase(m_window); } - Rect GetBounds() { - return Rect(GetParentOrigin(), GetSize()); - } // Get the rectangle in our parent window - int GetChar() { return ::wgetch(m_window); } - int GetCursorX() { return getcurx(m_window); } - int GetCursorY() { return getcury(m_window); } - Rect GetFrame() { - return Rect(Point(), GetSize()); - } // Get our rectangle in our own coordinate system - Point GetParentOrigin() { return Point(GetParentX(), GetParentY()); } - Size GetSize() { return Size(GetWidth(), GetHeight()); } - int GetParentX() { return getparx(m_window); } - int GetParentY() { return getpary(m_window); } - int GetMaxX() { return getmaxx(m_window); } - int GetMaxY() { return getmaxy(m_window); } - int GetWidth() { return GetMaxX(); } - int GetHeight() { return GetMaxY(); } - void MoveCursor(int x, int y) { ::wmove(m_window, y, x); } - void MoveWindow(int x, int y) { MoveWindow(Point(x, y)); } - void Resize(int w, int h) { ::wresize(m_window, h, w); } - void Resize(const Size &size) { - ::wresize(m_window, size.height, size.width); - } - void PutChar(int ch) { ::waddch(m_window, ch); } - void PutCString(const char *s, int len = -1) { ::waddnstr(m_window, s, len); } - void SetBackground(int color_pair_idx) { - ::wbkgd(m_window, COLOR_PAIR(color_pair_idx)); - } - - void PutCStringTruncated(const char *s, int right_pad) { - int bytes_left = GetWidth() - GetCursorX(); - if (bytes_left > right_pad) { - bytes_left -= right_pad; - ::waddnstr(m_window, s, bytes_left); - } - } - - void MoveWindow(const Point &origin) { - const bool moving_window = origin != GetParentOrigin(); - if (m_is_subwin && moving_window) { - // Can't move subwindows, must delete and re-create - Size size = GetSize(); - Reset(::subwin(m_parent->m_window, size.height, size.width, origin.y, - origin.x), - true); - } else { - ::mvwin(m_window, origin.y, origin.x); - } - } - - void SetBounds(const Rect &bounds) { - const bool moving_window = bounds.origin != GetParentOrigin(); - if (m_is_subwin && moving_window) { - // Can't move subwindows, must delete and re-create - Reset(::subwin(m_parent->m_window, bounds.size.height, bounds.size.width, - bounds.origin.y, bounds.origin.x), - true); - } else { - if (moving_window) - MoveWindow(bounds.origin); - Resize(bounds.size); - } - } - - void Printf(const char *format, ...) __attribute__((format(printf, 2, 3))) { - va_list args; - va_start(args, format); - vwprintw(m_window, format, args); - va_end(args); - } - - void Touch() { - ::touchwin(m_window); - if (m_parent) - m_parent->Touch(); - } - - WindowSP CreateSubWindow(const char *name, const Rect &bounds, - bool make_active) { - auto get_window = [this, &bounds]() { - return m_window - ? ::subwin(m_window, bounds.size.height, bounds.size.width, - bounds.origin.y, bounds.origin.x) - : ::newwin(bounds.size.height, bounds.size.width, - bounds.origin.y, bounds.origin.x); - }; - WindowSP subwindow_sp = std::make_shared(name, get_window(), true); - subwindow_sp->m_is_subwin = subwindow_sp.operator bool(); - subwindow_sp->m_parent = this; - if (make_active) { - m_prev_active_window_idx = m_curr_active_window_idx; - m_curr_active_window_idx = m_subwindows.size(); - } - m_subwindows.push_back(subwindow_sp); - ::top_panel(subwindow_sp->m_panel); - m_needs_update = true; - return subwindow_sp; - } - - bool RemoveSubWindow(Window *window) { - Windows::iterator pos, end = m_subwindows.end(); - size_t i = 0; - for (pos = m_subwindows.begin(); pos != end; ++pos, ++i) { - if ((*pos).get() == window) { - if (m_prev_active_window_idx == i) - m_prev_active_window_idx = UINT32_MAX; - else if (m_prev_active_window_idx != UINT32_MAX && - m_prev_active_window_idx > i) - --m_prev_active_window_idx; - - if (m_curr_active_window_idx == i) - m_curr_active_window_idx = UINT32_MAX; - else if (m_curr_active_window_idx != UINT32_MAX && - m_curr_active_window_idx > i) - --m_curr_active_window_idx; - window->Erase(); - m_subwindows.erase(pos); - m_needs_update = true; - if (m_parent) - m_parent->Touch(); - else - ::touchwin(stdscr); - return true; - } - } - return false; - } - - WindowSP FindSubWindow(const char *name) { - Windows::iterator pos, end = m_subwindows.end(); - size_t i = 0; - for (pos = m_subwindows.begin(); pos != end; ++pos, ++i) { - if ((*pos)->m_name == name) - return *pos; - } - return WindowSP(); - } - - void RemoveSubWindows() { - m_curr_active_window_idx = UINT32_MAX; - m_prev_active_window_idx = UINT32_MAX; - for (Windows::iterator pos = m_subwindows.begin(); - pos != m_subwindows.end(); pos = m_subwindows.erase(pos)) { - (*pos)->Erase(); - } - if (m_parent) - m_parent->Touch(); - else - ::touchwin(stdscr); - } - - WINDOW *get() { return m_window; } - - operator WINDOW *() { return m_window; } - - // Window drawing utilities - void DrawTitleBox(const char *title, const char *bottom_message = nullptr) { - attr_t attr = 0; - if (IsActive()) - attr = A_BOLD | COLOR_PAIR(2); - else - attr = 0; - if (attr) - AttributeOn(attr); - - Box(); - MoveCursor(3, 0); - - if (title && title[0]) { - PutChar('<'); - PutCString(title); - PutChar('>'); - } - - if (bottom_message && bottom_message[0]) { - int bottom_message_length = strlen(bottom_message); - int x = GetWidth() - 3 - (bottom_message_length + 2); - - if (x > 0) { - MoveCursor(x, GetHeight() - 1); - PutChar('['); - PutCString(bottom_message); - PutChar(']'); - } else { - MoveCursor(1, GetHeight() - 1); - PutChar('['); - PutCStringTruncated(bottom_message, 1); - } - } - if (attr) - AttributeOff(attr); - } - - virtual void Draw(bool force) { - if (m_delegate_sp && m_delegate_sp->WindowDelegateDraw(*this, force)) - return; - - for (auto &subwindow_sp : m_subwindows) - subwindow_sp->Draw(force); - } - - bool CreateHelpSubwindow() { - if (m_delegate_sp) { - const char *text = m_delegate_sp->WindowDelegateGetHelpText(); - KeyHelp *key_help = m_delegate_sp->WindowDelegateGetKeyHelp(); - if ((text && text[0]) || key_help) { - std::unique_ptr help_delegate_up( - new HelpDialogDelegate(text, key_help)); - const size_t num_lines = help_delegate_up->GetNumLines(); - const size_t max_length = help_delegate_up->GetMaxLineLength(); - Rect bounds = GetBounds(); - bounds.Inset(1, 1); - if (max_length + 4 < static_cast(bounds.size.width)) { - bounds.origin.x += (bounds.size.width - max_length + 4) / 2; - bounds.size.width = max_length + 4; - } else { - if (bounds.size.width > 100) { - const int inset_w = bounds.size.width / 4; - bounds.origin.x += inset_w; - bounds.size.width -= 2 * inset_w; - } - } - - if (num_lines + 2 < static_cast(bounds.size.height)) { - bounds.origin.y += (bounds.size.height - num_lines + 2) / 2; - bounds.size.height = num_lines + 2; - } else { - if (bounds.size.height > 100) { - const int inset_h = bounds.size.height / 4; - bounds.origin.y += inset_h; - bounds.size.height -= 2 * inset_h; - } - } - WindowSP help_window_sp; - Window *parent_window = GetParent(); - if (parent_window) - help_window_sp = parent_window->CreateSubWindow("Help", bounds, true); - else - help_window_sp = CreateSubWindow("Help", bounds, true); - help_window_sp->SetDelegate( - WindowDelegateSP(help_delegate_up.release())); - return true; - } - } - return false; - } - - virtual HandleCharResult HandleChar(int key) { - // Always check the active window first - HandleCharResult result = eKeyNotHandled; - WindowSP active_window_sp = GetActiveWindow(); - if (active_window_sp) { - result = active_window_sp->HandleChar(key); - if (result != eKeyNotHandled) - return result; - } - - if (m_delegate_sp) { - result = m_delegate_sp->WindowDelegateHandleChar(*this, key); - if (result != eKeyNotHandled) - return result; - } - - // Then check for any windows that want any keys that weren't handled. This - // is typically only for a menubar. Make a copy of the subwindows in case - // any HandleChar() functions muck with the subwindows. If we don't do - // this, we can crash when iterating over the subwindows. - Windows subwindows(m_subwindows); - for (auto subwindow_sp : subwindows) { - if (!subwindow_sp->m_can_activate) { - HandleCharResult result = subwindow_sp->HandleChar(key); - if (result != eKeyNotHandled) - return result; - } - } - - return eKeyNotHandled; - } - - WindowSP GetActiveWindow() { - if (!m_subwindows.empty()) { - if (m_curr_active_window_idx >= m_subwindows.size()) { - if (m_prev_active_window_idx < m_subwindows.size()) { - m_curr_active_window_idx = m_prev_active_window_idx; - m_prev_active_window_idx = UINT32_MAX; - } else if (IsActive()) { - m_prev_active_window_idx = UINT32_MAX; - m_curr_active_window_idx = UINT32_MAX; - - // Find first window that wants to be active if this window is active - const size_t num_subwindows = m_subwindows.size(); - for (size_t i = 0; i < num_subwindows; ++i) { - if (m_subwindows[i]->GetCanBeActive()) { - m_curr_active_window_idx = i; - break; - } - } - } - } - - if (m_curr_active_window_idx < m_subwindows.size()) - return m_subwindows[m_curr_active_window_idx]; - } - return WindowSP(); - } - - bool GetCanBeActive() const { return m_can_activate; } - - void SetCanBeActive(bool b) { m_can_activate = b; } - - void SetDelegate(const WindowDelegateSP &delegate_sp) { - m_delegate_sp = delegate_sp; - } - - Window *GetParent() const { return m_parent; } - - bool IsActive() const { - if (m_parent) - return m_parent->GetActiveWindow().get() == this; - else - return true; // Top level window is always active - } - - void SelectNextWindowAsActive() { - // Move active focus to next window - const size_t num_subwindows = m_subwindows.size(); - if (m_curr_active_window_idx == UINT32_MAX) { - uint32_t idx = 0; - for (auto subwindow_sp : m_subwindows) { - if (subwindow_sp->GetCanBeActive()) { - m_curr_active_window_idx = idx; - break; - } - ++idx; - } - } else if (m_curr_active_window_idx + 1 < num_subwindows) { - bool handled = false; - m_prev_active_window_idx = m_curr_active_window_idx; - for (size_t idx = m_curr_active_window_idx + 1; idx < num_subwindows; - ++idx) { - if (m_subwindows[idx]->GetCanBeActive()) { - m_curr_active_window_idx = idx; - handled = true; - break; - } - } - if (!handled) { - for (size_t idx = 0; idx <= m_prev_active_window_idx; ++idx) { - if (m_subwindows[idx]->GetCanBeActive()) { - m_curr_active_window_idx = idx; - break; - } - } - } - } else { - m_prev_active_window_idx = m_curr_active_window_idx; - for (size_t idx = 0; idx < num_subwindows; ++idx) { - if (m_subwindows[idx]->GetCanBeActive()) { - m_curr_active_window_idx = idx; - break; - } - } - } - } - - const char *GetName() const { return m_name.c_str(); } - -protected: - std::string m_name; - WINDOW *m_window; - PANEL *m_panel; - Window *m_parent; - Windows m_subwindows; - WindowDelegateSP m_delegate_sp; - uint32_t m_curr_active_window_idx; - uint32_t m_prev_active_window_idx; - bool m_delete; - bool m_needs_update; - bool m_can_activate; - bool m_is_subwin; - -private: - DISALLOW_COPY_AND_ASSIGN(Window); -}; - -class MenuDelegate { -public: - virtual ~MenuDelegate() = default; - - virtual MenuActionResult MenuDelegateAction(Menu &menu) = 0; -}; - -class Menu : public WindowDelegate { -public: - enum class Type { Invalid, Bar, Item, Separator }; - - // Menubar or separator constructor - Menu(Type type); - - // Menuitem constructor - Menu(const char *name, const char *key_name, int key_value, - uint64_t identifier); - - ~Menu() override = default; - - const MenuDelegateSP &GetDelegate() const { return m_delegate_sp; } - - void SetDelegate(const MenuDelegateSP &delegate_sp) { - m_delegate_sp = delegate_sp; - } - - void RecalculateNameLengths(); - - void AddSubmenu(const MenuSP &menu_sp); - - int DrawAndRunMenu(Window &window); - - void DrawMenuTitle(Window &window, bool highlight); - - bool WindowDelegateDraw(Window &window, bool force) override; - - HandleCharResult WindowDelegateHandleChar(Window &window, int key) override; - - MenuActionResult ActionPrivate(Menu &menu) { - MenuActionResult result = MenuActionResult::NotHandled; - if (m_delegate_sp) { - result = m_delegate_sp->MenuDelegateAction(menu); - if (result != MenuActionResult::NotHandled) - return result; - } else if (m_parent) { - result = m_parent->ActionPrivate(menu); - if (result != MenuActionResult::NotHandled) - return result; - } - return m_canned_result; - } - - MenuActionResult Action() { - // Call the recursive action so it can try to handle it with the menu - // delegate, and if not, try our parent menu - return ActionPrivate(*this); - } - - void SetCannedResult(MenuActionResult result) { m_canned_result = result; } - - Menus &GetSubmenus() { return m_submenus; } - - const Menus &GetSubmenus() const { return m_submenus; } - - int GetSelectedSubmenuIndex() const { return m_selected; } - - void SetSelectedSubmenuIndex(int idx) { m_selected = idx; } - - Type GetType() const { return m_type; } - - int GetStartingColumn() const { return m_start_col; } - - void SetStartingColumn(int col) { m_start_col = col; } - - int GetKeyValue() const { return m_key_value; } - - std::string &GetName() { return m_name; } - - int GetDrawWidth() const { - return m_max_submenu_name_length + m_max_submenu_key_name_length + 8; - } - - uint64_t GetIdentifier() const { return m_identifier; } - - void SetIdentifier(uint64_t identifier) { m_identifier = identifier; } - -protected: - std::string m_name; - std::string m_key_name; - uint64_t m_identifier; - Type m_type; - int m_key_value; - int m_start_col; - int m_max_submenu_name_length; - int m_max_submenu_key_name_length; - int m_selected; - Menu *m_parent; - Menus m_submenus; - WindowSP m_menu_window_sp; - MenuActionResult m_canned_result; - MenuDelegateSP m_delegate_sp; -}; - -// Menubar or separator constructor -Menu::Menu(Type type) - : m_name(), m_key_name(), m_identifier(0), m_type(type), m_key_value(0), - m_start_col(0), m_max_submenu_name_length(0), - m_max_submenu_key_name_length(0), m_selected(0), m_parent(nullptr), - m_submenus(), m_canned_result(MenuActionResult::NotHandled), - m_delegate_sp() {} - -// Menuitem constructor -Menu::Menu(const char *name, const char *key_name, int key_value, - uint64_t identifier) - : m_name(), m_key_name(), m_identifier(identifier), m_type(Type::Invalid), - m_key_value(key_value), m_start_col(0), m_max_submenu_name_length(0), - m_max_submenu_key_name_length(0), m_selected(0), m_parent(nullptr), - m_submenus(), m_canned_result(MenuActionResult::NotHandled), - m_delegate_sp() { - if (name && name[0]) { - m_name = name; - m_type = Type::Item; - if (key_name && key_name[0]) - m_key_name = key_name; - } else { - m_type = Type::Separator; - } -} - -void Menu::RecalculateNameLengths() { - m_max_submenu_name_length = 0; - m_max_submenu_key_name_length = 0; - Menus &submenus = GetSubmenus(); - const size_t num_submenus = submenus.size(); - for (size_t i = 0; i < num_submenus; ++i) { - Menu *submenu = submenus[i].get(); - if (static_cast(m_max_submenu_name_length) < submenu->m_name.size()) - m_max_submenu_name_length = submenu->m_name.size(); - if (static_cast(m_max_submenu_key_name_length) < - submenu->m_key_name.size()) - m_max_submenu_key_name_length = submenu->m_key_name.size(); - } -} - -void Menu::AddSubmenu(const MenuSP &menu_sp) { - menu_sp->m_parent = this; - if (static_cast(m_max_submenu_name_length) < menu_sp->m_name.size()) - m_max_submenu_name_length = menu_sp->m_name.size(); - if (static_cast(m_max_submenu_key_name_length) < - menu_sp->m_key_name.size()) - m_max_submenu_key_name_length = menu_sp->m_key_name.size(); - m_submenus.push_back(menu_sp); -} - -void Menu::DrawMenuTitle(Window &window, bool highlight) { - if (m_type == Type::Separator) { - window.MoveCursor(0, window.GetCursorY()); - window.PutChar(ACS_LTEE); - int width = window.GetWidth(); - if (width > 2) { - width -= 2; - for (int i = 0; i < width; ++i) - window.PutChar(ACS_HLINE); - } - window.PutChar(ACS_RTEE); - } else { - const int shortcut_key = m_key_value; - bool underlined_shortcut = false; - const attr_t hilgight_attr = A_REVERSE; - if (highlight) - window.AttributeOn(hilgight_attr); - if (isprint(shortcut_key)) { - size_t lower_pos = m_name.find(tolower(shortcut_key)); - size_t upper_pos = m_name.find(toupper(shortcut_key)); - const char *name = m_name.c_str(); - size_t pos = std::min(lower_pos, upper_pos); - if (pos != std::string::npos) { - underlined_shortcut = true; - if (pos > 0) { - window.PutCString(name, pos); - name += pos; - } - const attr_t shortcut_attr = A_UNDERLINE | A_BOLD; - window.AttributeOn(shortcut_attr); - window.PutChar(name[0]); - window.AttributeOff(shortcut_attr); - name++; - if (name[0]) - window.PutCString(name); - } - } - - if (!underlined_shortcut) { - window.PutCString(m_name.c_str()); - } - - if (highlight) - window.AttributeOff(hilgight_attr); - - if (m_key_name.empty()) { - if (!underlined_shortcut && isprint(m_key_value)) { - window.AttributeOn(COLOR_PAIR(3)); - window.Printf(" (%c)", m_key_value); - window.AttributeOff(COLOR_PAIR(3)); - } - } else { - window.AttributeOn(COLOR_PAIR(3)); - window.Printf(" (%s)", m_key_name.c_str()); - window.AttributeOff(COLOR_PAIR(3)); - } - } -} - -bool Menu::WindowDelegateDraw(Window &window, bool force) { - Menus &submenus = GetSubmenus(); - const size_t num_submenus = submenus.size(); - const int selected_idx = GetSelectedSubmenuIndex(); - Menu::Type menu_type = GetType(); - switch (menu_type) { - case Menu::Type::Bar: { - window.SetBackground(2); - window.MoveCursor(0, 0); - for (size_t i = 0; i < num_submenus; ++i) { - Menu *menu = submenus[i].get(); - if (i > 0) - window.PutChar(' '); - menu->SetStartingColumn(window.GetCursorX()); - window.PutCString("| "); - menu->DrawMenuTitle(window, false); - } - window.PutCString(" |"); - } break; - - case Menu::Type::Item: { - int y = 1; - int x = 3; - // Draw the menu - int cursor_x = 0; - int cursor_y = 0; - window.Erase(); - window.SetBackground(2); - window.Box(); - for (size_t i = 0; i < num_submenus; ++i) { - const bool is_selected = (i == static_cast(selected_idx)); - window.MoveCursor(x, y + i); - if (is_selected) { - // Remember where we want the cursor to be - cursor_x = x - 1; - cursor_y = y + i; - } - submenus[i]->DrawMenuTitle(window, is_selected); - } - window.MoveCursor(cursor_x, cursor_y); - } break; - - default: - case Menu::Type::Separator: - break; - } - return true; // Drawing handled... -} - -HandleCharResult Menu::WindowDelegateHandleChar(Window &window, int key) { - HandleCharResult result = eKeyNotHandled; - - Menus &submenus = GetSubmenus(); - const size_t num_submenus = submenus.size(); - const int selected_idx = GetSelectedSubmenuIndex(); - Menu::Type menu_type = GetType(); - if (menu_type == Menu::Type::Bar) { - MenuSP run_menu_sp; - switch (key) { - case KEY_DOWN: - case KEY_UP: - // Show last menu or first menu - if (selected_idx < static_cast(num_submenus)) - run_menu_sp = submenus[selected_idx]; - else if (!submenus.empty()) - run_menu_sp = submenus.front(); - result = eKeyHandled; - break; - - case KEY_RIGHT: - ++m_selected; - if (m_selected >= static_cast(num_submenus)) - m_selected = 0; - if (m_selected < static_cast(num_submenus)) - run_menu_sp = submenus[m_selected]; - else if (!submenus.empty()) - run_menu_sp = submenus.front(); - result = eKeyHandled; - break; - - case KEY_LEFT: - --m_selected; - if (m_selected < 0) - m_selected = num_submenus - 1; - if (m_selected < static_cast(num_submenus)) - run_menu_sp = submenus[m_selected]; - else if (!submenus.empty()) - run_menu_sp = submenus.front(); - result = eKeyHandled; - break; - - default: - for (size_t i = 0; i < num_submenus; ++i) { - if (submenus[i]->GetKeyValue() == key) { - SetSelectedSubmenuIndex(i); - run_menu_sp = submenus[i]; - result = eKeyHandled; - break; - } - } - break; - } - - if (run_menu_sp) { - // Run the action on this menu in case we need to populate the menu with - // dynamic content and also in case check marks, and any other menu - // decorations need to be calculated - if (run_menu_sp->Action() == MenuActionResult::Quit) - return eQuitApplication; - - Rect menu_bounds; - menu_bounds.origin.x = run_menu_sp->GetStartingColumn(); - menu_bounds.origin.y = 1; - menu_bounds.size.width = run_menu_sp->GetDrawWidth(); - menu_bounds.size.height = run_menu_sp->GetSubmenus().size() + 2; - if (m_menu_window_sp) - window.GetParent()->RemoveSubWindow(m_menu_window_sp.get()); - - m_menu_window_sp = window.GetParent()->CreateSubWindow( - run_menu_sp->GetName().c_str(), menu_bounds, true); - m_menu_window_sp->SetDelegate(run_menu_sp); - } - } else if (menu_type == Menu::Type::Item) { - switch (key) { - case KEY_DOWN: - if (m_submenus.size() > 1) { - const int start_select = m_selected; - while (++m_selected != start_select) { - if (static_cast(m_selected) >= num_submenus) - m_selected = 0; - if (m_submenus[m_selected]->GetType() == Type::Separator) - continue; - else - break; - } - return eKeyHandled; - } - break; - - case KEY_UP: - if (m_submenus.size() > 1) { - const int start_select = m_selected; - while (--m_selected != start_select) { - if (m_selected < static_cast(0)) - m_selected = num_submenus - 1; - if (m_submenus[m_selected]->GetType() == Type::Separator) - continue; - else - break; - } - return eKeyHandled; - } - break; - - case KEY_RETURN: - if (static_cast(selected_idx) < num_submenus) { - if (submenus[selected_idx]->Action() == MenuActionResult::Quit) - return eQuitApplication; - window.GetParent()->RemoveSubWindow(&window); - return eKeyHandled; - } - break; - - case KEY_ESCAPE: // Beware: pressing escape key has 1 to 2 second delay in - // case other chars are entered for escaped sequences - window.GetParent()->RemoveSubWindow(&window); - return eKeyHandled; - - default: - for (size_t i = 0; i < num_submenus; ++i) { - Menu *menu = submenus[i].get(); - if (menu->GetKeyValue() == key) { - SetSelectedSubmenuIndex(i); - window.GetParent()->RemoveSubWindow(&window); - if (menu->Action() == MenuActionResult::Quit) - return eQuitApplication; - return eKeyHandled; - } - } - break; - } - } else if (menu_type == Menu::Type::Separator) { - } - return result; -} - -class Application { -public: - Application(FILE *in, FILE *out) - : m_window_sp(), m_screen(nullptr), m_in(in), m_out(out) {} - - ~Application() { - m_window_delegates.clear(); - m_window_sp.reset(); - if (m_screen) { - ::delscreen(m_screen); - m_screen = nullptr; - } - } - - void Initialize() { - ::setlocale(LC_ALL, ""); - ::setlocale(LC_CTYPE, ""); - m_screen = ::newterm(nullptr, m_out, m_in); - ::start_color(); - ::curs_set(0); - ::noecho(); - ::keypad(stdscr, TRUE); - } - - void Terminate() { ::endwin(); } - - void Run(Debugger &debugger) { - bool done = false; - int delay_in_tenths_of_a_second = 1; - - // Alas the threading model in curses is a bit lame so we need to resort to - // polling every 0.5 seconds. We could poll for stdin ourselves and then - // pass the keys down but then we need to translate all of the escape - // sequences ourselves. So we resort to polling for input because we need - // to receive async process events while in this loop. - - halfdelay(delay_in_tenths_of_a_second); // Poll using some number of tenths - // of seconds seconds when calling - // Window::GetChar() - - ListenerSP listener_sp( - Listener::MakeListener("lldb.IOHandler.curses.Application")); - ConstString broadcaster_class_target(Target::GetStaticBroadcasterClass()); - ConstString broadcaster_class_process(Process::GetStaticBroadcasterClass()); - ConstString broadcaster_class_thread(Thread::GetStaticBroadcasterClass()); - debugger.EnableForwardEvents(listener_sp); - - bool update = true; -#if defined(__APPLE__) - std::deque escape_chars; -#endif - - while (!done) { - if (update) { - m_window_sp->Draw(false); - // All windows should be calling Window::DeferredRefresh() instead of - // Window::Refresh() so we can do a single update and avoid any screen - // blinking - update_panels(); - - // Cursor hiding isn't working on MacOSX, so hide it in the top left - // corner - m_window_sp->MoveCursor(0, 0); - - doupdate(); - update = false; - } - -#if defined(__APPLE__) - // Terminal.app doesn't map its function keys correctly, F1-F4 default - // to: \033OP, \033OQ, \033OR, \033OS, so lets take care of this here if - // possible - int ch; - if (escape_chars.empty()) - ch = m_window_sp->GetChar(); - else { - ch = escape_chars.front(); - escape_chars.pop_front(); - } - if (ch == KEY_ESCAPE) { - int ch2 = m_window_sp->GetChar(); - if (ch2 == 'O') { - int ch3 = m_window_sp->GetChar(); - switch (ch3) { - case 'P': - ch = KEY_F(1); - break; - case 'Q': - ch = KEY_F(2); - break; - case 'R': - ch = KEY_F(3); - break; - case 'S': - ch = KEY_F(4); - break; - default: - escape_chars.push_back(ch2); - if (ch3 != -1) - escape_chars.push_back(ch3); - break; - } - } else if (ch2 != -1) - escape_chars.push_back(ch2); - } -#else - int ch = m_window_sp->GetChar(); - -#endif - if (ch == -1) { - if (feof(m_in) || ferror(m_in)) { - done = true; - } else { - // Just a timeout from using halfdelay(), check for events - EventSP event_sp; - while (listener_sp->PeekAtNextEvent()) { - listener_sp->GetEvent(event_sp, std::chrono::seconds(0)); - - if (event_sp) { - Broadcaster *broadcaster = event_sp->GetBroadcaster(); - if (broadcaster) { - // uint32_t event_type = event_sp->GetType(); - ConstString broadcaster_class( - broadcaster->GetBroadcasterClass()); - if (broadcaster_class == broadcaster_class_process) { - debugger.GetCommandInterpreter().UpdateExecutionContext( - nullptr); - update = true; - continue; // Don't get any key, just update our view - } - } - } - } - } - } else { - HandleCharResult key_result = m_window_sp->HandleChar(ch); - switch (key_result) { - case eKeyHandled: - debugger.GetCommandInterpreter().UpdateExecutionContext(nullptr); - update = true; - break; - case eKeyNotHandled: - break; - case eQuitApplication: - done = true; - break; - } - } - } - - debugger.CancelForwardEvents(listener_sp); - } - - WindowSP &GetMainWindow() { - if (!m_window_sp) - m_window_sp = std::make_shared("main", stdscr, false); - return m_window_sp; - } - -protected: - WindowSP m_window_sp; - WindowDelegates m_window_delegates; - SCREEN *m_screen; - FILE *m_in; - FILE *m_out; -}; - -} // namespace curses - -using namespace curses; - -struct Row { - ValueObjectManager value; - Row *parent; - // The process stop ID when the children were calculated. - uint32_t children_stop_id; - int row_idx; - int x; - int y; - bool might_have_children; - bool expanded; - bool calculated_children; - std::vector children; - - Row(const ValueObjectSP &v, Row *p) - : value(v, lldb::eDynamicDontRunTarget, true), parent(p), row_idx(0), - x(1), y(1), might_have_children(v ? v->MightHaveChildren() : false), - expanded(false), calculated_children(false), children() {} - - size_t GetDepth() const { - if (parent) - return 1 + parent->GetDepth(); - return 0; - } - - void Expand() { expanded = true; } - - std::vector &GetChildren() { - ProcessSP process_sp = value.GetProcessSP(); - auto stop_id = process_sp->GetStopID(); - if (process_sp && stop_id != children_stop_id) { - children_stop_id = stop_id; - calculated_children = false; - } - if (!calculated_children) { - children.clear(); - calculated_children = true; - ValueObjectSP valobj = value.GetSP(); - if (valobj) { - const size_t num_children = valobj->GetNumChildren(); - for (size_t i = 0; i < num_children; ++i) { - children.push_back(Row(valobj->GetChildAtIndex(i, true), this)); - } - } - } - return children; - } - - void Unexpand() { - expanded = false; - calculated_children = false; - children.clear(); - } - - void DrawTree(Window &window) { - if (parent) - parent->DrawTreeForChild(window, this, 0); - - if (might_have_children) { - // It we can get UTF8 characters to work we should try to use the - // "symbol" UTF8 string below - // const char *symbol = ""; - // if (row.expanded) - // symbol = "\xe2\x96\xbd "; - // else - // symbol = "\xe2\x96\xb7 "; - // window.PutCString (symbol); - - // The ACS_DARROW and ACS_RARROW don't look very nice they are just a 'v' - // or '>' character... - // if (expanded) - // window.PutChar (ACS_DARROW); - // else - // window.PutChar (ACS_RARROW); - // Since we can't find any good looking right arrow/down arrow symbols, - // just use a diamond... - window.PutChar(ACS_DIAMOND); - window.PutChar(ACS_HLINE); - } - } - - void DrawTreeForChild(Window &window, Row *child, uint32_t reverse_depth) { - if (parent) - parent->DrawTreeForChild(window, this, reverse_depth + 1); - - if (&GetChildren().back() == child) { - // Last child - if (reverse_depth == 0) { - window.PutChar(ACS_LLCORNER); - window.PutChar(ACS_HLINE); - } else { - window.PutChar(' '); - window.PutChar(' '); - } - } else { - if (reverse_depth == 0) { - window.PutChar(ACS_LTEE); - window.PutChar(ACS_HLINE); - } else { - window.PutChar(ACS_VLINE); - window.PutChar(' '); - } - } - } -}; - -struct DisplayOptions { - bool show_types; -}; - -class TreeItem; - -class TreeDelegate { -public: - TreeDelegate() = default; - virtual ~TreeDelegate() = default; - - virtual void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) = 0; - virtual void TreeDelegateGenerateChildren(TreeItem &item) = 0; - virtual bool TreeDelegateItemSelected( - TreeItem &item) = 0; // Return true if we need to update views -}; - -typedef std::shared_ptr TreeDelegateSP; - -class TreeItem { -public: - TreeItem(TreeItem *parent, TreeDelegate &delegate, bool might_have_children) - : m_parent(parent), m_delegate(delegate), m_user_data(nullptr), - m_identifier(0), m_row_idx(-1), m_children(), - m_might_have_children(might_have_children), m_is_expanded(false) {} - - TreeItem &operator=(const TreeItem &rhs) { - if (this != &rhs) { - m_parent = rhs.m_parent; - m_delegate = rhs.m_delegate; - m_user_data = rhs.m_user_data; - m_identifier = rhs.m_identifier; - m_row_idx = rhs.m_row_idx; - m_children = rhs.m_children; - m_might_have_children = rhs.m_might_have_children; - m_is_expanded = rhs.m_is_expanded; - } - return *this; - } - - size_t GetDepth() const { - if (m_parent) - return 1 + m_parent->GetDepth(); - return 0; - } - - int GetRowIndex() const { return m_row_idx; } - - void ClearChildren() { m_children.clear(); } - - void Resize(size_t n, const TreeItem &t) { m_children.resize(n, t); } - - TreeItem &operator[](size_t i) { return m_children[i]; } - - void SetRowIndex(int row_idx) { m_row_idx = row_idx; } - - size_t GetNumChildren() { - m_delegate.TreeDelegateGenerateChildren(*this); - return m_children.size(); - } - - void ItemWasSelected() { m_delegate.TreeDelegateItemSelected(*this); } - - void CalculateRowIndexes(int &row_idx) { - SetRowIndex(row_idx); - ++row_idx; - - const bool expanded = IsExpanded(); - - // The root item must calculate its children, or we must calculate the - // number of children if the item is expanded - if (m_parent == nullptr || expanded) - GetNumChildren(); - - for (auto &item : m_children) { - if (expanded) - item.CalculateRowIndexes(row_idx); - else - item.SetRowIndex(-1); - } - } - - TreeItem *GetParent() { return m_parent; } - - bool IsExpanded() const { return m_is_expanded; } - - void Expand() { m_is_expanded = true; } - - void Unexpand() { m_is_expanded = false; } - - bool Draw(Window &window, const int first_visible_row, - const uint32_t selected_row_idx, int &row_idx, int &num_rows_left) { - if (num_rows_left <= 0) - return false; - - if (m_row_idx >= first_visible_row) { - window.MoveCursor(2, row_idx + 1); - - if (m_parent) - m_parent->DrawTreeForChild(window, this, 0); - - if (m_might_have_children) { - // It we can get UTF8 characters to work we should try to use the - // "symbol" UTF8 string below - // const char *symbol = ""; - // if (row.expanded) - // symbol = "\xe2\x96\xbd "; - // else - // symbol = "\xe2\x96\xb7 "; - // window.PutCString (symbol); - - // The ACS_DARROW and ACS_RARROW don't look very nice they are just a - // 'v' or '>' character... - // if (expanded) - // window.PutChar (ACS_DARROW); - // else - // window.PutChar (ACS_RARROW); - // Since we can't find any good looking right arrow/down arrow symbols, - // just use a diamond... - window.PutChar(ACS_DIAMOND); - window.PutChar(ACS_HLINE); - } - bool highlight = (selected_row_idx == static_cast(m_row_idx)) && - window.IsActive(); - - if (highlight) - window.AttributeOn(A_REVERSE); - - m_delegate.TreeDelegateDrawTreeItem(*this, window); - - if (highlight) - window.AttributeOff(A_REVERSE); - ++row_idx; - --num_rows_left; - } - - if (num_rows_left <= 0) - return false; // We are done drawing... - - if (IsExpanded()) { - for (auto &item : m_children) { - // If we displayed all the rows and item.Draw() returns false we are - // done drawing and can exit this for loop - if (!item.Draw(window, first_visible_row, selected_row_idx, row_idx, - num_rows_left)) - break; - } - } - return num_rows_left >= 0; // Return true if not done drawing yet - } - - void DrawTreeForChild(Window &window, TreeItem *child, - uint32_t reverse_depth) { - if (m_parent) - m_parent->DrawTreeForChild(window, this, reverse_depth + 1); - - if (&m_children.back() == child) { - // Last child - if (reverse_depth == 0) { - window.PutChar(ACS_LLCORNER); - window.PutChar(ACS_HLINE); - } else { - window.PutChar(' '); - window.PutChar(' '); - } - } else { - if (reverse_depth == 0) { - window.PutChar(ACS_LTEE); - window.PutChar(ACS_HLINE); - } else { - window.PutChar(ACS_VLINE); - window.PutChar(' '); - } - } - } - - TreeItem *GetItemForRowIndex(uint32_t row_idx) { - if (static_cast(m_row_idx) == row_idx) - return this; - if (m_children.empty()) - return nullptr; - if (IsExpanded()) { - for (auto &item : m_children) { - TreeItem *selected_item_ptr = item.GetItemForRowIndex(row_idx); - if (selected_item_ptr) - return selected_item_ptr; - } - } - return nullptr; - } - - void *GetUserData() const { return m_user_data; } - - void SetUserData(void *user_data) { m_user_data = user_data; } - - uint64_t GetIdentifier() const { return m_identifier; } - - void SetIdentifier(uint64_t identifier) { m_identifier = identifier; } - - void SetMightHaveChildren(bool b) { m_might_have_children = b; } - -protected: - TreeItem *m_parent; - TreeDelegate &m_delegate; - void *m_user_data; - uint64_t m_identifier; - int m_row_idx; // Zero based visible row index, -1 if not visible or for the - // root item - std::vector m_children; - bool m_might_have_children; - bool m_is_expanded; -}; - -class TreeWindowDelegate : public WindowDelegate { -public: - TreeWindowDelegate(Debugger &debugger, const TreeDelegateSP &delegate_sp) - : m_debugger(debugger), m_delegate_sp(delegate_sp), - m_root(nullptr, *delegate_sp, true), m_selected_item(nullptr), - m_num_rows(0), m_selected_row_idx(0), m_first_visible_row(0), - m_min_x(0), m_min_y(0), m_max_x(0), m_max_y(0) {} - - int NumVisibleRows() const { return m_max_y - m_min_y; } - - bool WindowDelegateDraw(Window &window, bool force) override { - ExecutionContext exe_ctx( - m_debugger.GetCommandInterpreter().GetExecutionContext()); - Process *process = exe_ctx.GetProcessPtr(); - - bool display_content = false; - if (process) { - StateType state = process->GetState(); - if (StateIsStoppedState(state, true)) { - // We are stopped, so it is ok to - display_content = true; - } else if (StateIsRunningState(state)) { - return true; // Don't do any updating when we are running - } - } - - m_min_x = 2; - m_min_y = 1; - m_max_x = window.GetWidth() - 1; - m_max_y = window.GetHeight() - 1; - - window.Erase(); - window.DrawTitleBox(window.GetName()); - - if (display_content) { - const int num_visible_rows = NumVisibleRows(); - m_num_rows = 0; - m_root.CalculateRowIndexes(m_num_rows); - - // If we unexpanded while having something selected our total number of - // rows is less than the num visible rows, then make sure we show all the - // rows by setting the first visible row accordingly. - if (m_first_visible_row > 0 && m_num_rows < num_visible_rows) - m_first_visible_row = 0; - - // Make sure the selected row is always visible - if (m_selected_row_idx < m_first_visible_row) - m_first_visible_row = m_selected_row_idx; - else if (m_first_visible_row + num_visible_rows <= m_selected_row_idx) - m_first_visible_row = m_selected_row_idx - num_visible_rows + 1; - - int row_idx = 0; - int num_rows_left = num_visible_rows; - m_root.Draw(window, m_first_visible_row, m_selected_row_idx, row_idx, - num_rows_left); - // Get the selected row - m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); - } else { - m_selected_item = nullptr; - } - - return true; // Drawing handled - } - - const char *WindowDelegateGetHelpText() override { - return "Thread window keyboard shortcuts:"; - } - - KeyHelp *WindowDelegateGetKeyHelp() override { - static curses::KeyHelp g_source_view_key_help[] = { - {KEY_UP, "Select previous item"}, - {KEY_DOWN, "Select next item"}, - {KEY_RIGHT, "Expand the selected item"}, - {KEY_LEFT, - "Unexpand the selected item or select parent if not expanded"}, - {KEY_PPAGE, "Page up"}, - {KEY_NPAGE, "Page down"}, - {'h', "Show help dialog"}, - {' ', "Toggle item expansion"}, - {',', "Page up"}, - {'.', "Page down"}, - {'\0', nullptr}}; - return g_source_view_key_help; - } - - HandleCharResult WindowDelegateHandleChar(Window &window, int c) override { - switch (c) { - case ',': - case KEY_PPAGE: - // Page up key - if (m_first_visible_row > 0) { - if (m_first_visible_row > m_max_y) - m_first_visible_row -= m_max_y; - else - m_first_visible_row = 0; - m_selected_row_idx = m_first_visible_row; - m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); - if (m_selected_item) - m_selected_item->ItemWasSelected(); - } - return eKeyHandled; - - case '.': - case KEY_NPAGE: - // Page down key - if (m_num_rows > m_max_y) { - if (m_first_visible_row + m_max_y < m_num_rows) { - m_first_visible_row += m_max_y; - m_selected_row_idx = m_first_visible_row; - m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); - if (m_selected_item) - m_selected_item->ItemWasSelected(); - } - } - return eKeyHandled; - - case KEY_UP: - if (m_selected_row_idx > 0) { - --m_selected_row_idx; - m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); - if (m_selected_item) - m_selected_item->ItemWasSelected(); - } - return eKeyHandled; - - case KEY_DOWN: - if (m_selected_row_idx + 1 < m_num_rows) { - ++m_selected_row_idx; - m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); - if (m_selected_item) - m_selected_item->ItemWasSelected(); - } - return eKeyHandled; - - case KEY_RIGHT: - if (m_selected_item) { - if (!m_selected_item->IsExpanded()) - m_selected_item->Expand(); - } - return eKeyHandled; - - case KEY_LEFT: - if (m_selected_item) { - if (m_selected_item->IsExpanded()) - m_selected_item->Unexpand(); - else if (m_selected_item->GetParent()) { - m_selected_row_idx = m_selected_item->GetParent()->GetRowIndex(); - m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); - if (m_selected_item) - m_selected_item->ItemWasSelected(); - } - } - return eKeyHandled; - - case ' ': - // Toggle expansion state when SPACE is pressed - if (m_selected_item) { - if (m_selected_item->IsExpanded()) - m_selected_item->Unexpand(); - else - m_selected_item->Expand(); - } - return eKeyHandled; - - case 'h': - window.CreateHelpSubwindow(); - return eKeyHandled; - - default: - break; - } - return eKeyNotHandled; - } - -protected: - Debugger &m_debugger; - TreeDelegateSP m_delegate_sp; - TreeItem m_root; - TreeItem *m_selected_item; - int m_num_rows; - int m_selected_row_idx; - int m_first_visible_row; - int m_min_x; - int m_min_y; - int m_max_x; - int m_max_y; -}; - -class FrameTreeDelegate : public TreeDelegate { -public: - FrameTreeDelegate() : TreeDelegate() { - FormatEntity::Parse( - "frame #${frame.index}: {${function.name}${function.pc-offset}}}", - m_format); - } - - ~FrameTreeDelegate() override = default; - - void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override { - Thread *thread = (Thread *)item.GetUserData(); - if (thread) { - const uint64_t frame_idx = item.GetIdentifier(); - StackFrameSP frame_sp = thread->GetStackFrameAtIndex(frame_idx); - if (frame_sp) { - StreamString strm; - const SymbolContext &sc = - frame_sp->GetSymbolContext(eSymbolContextEverything); - ExecutionContext exe_ctx(frame_sp); - if (FormatEntity::Format(m_format, strm, &sc, &exe_ctx, nullptr, - nullptr, false, false)) { - int right_pad = 1; - window.PutCStringTruncated(strm.GetString().str().c_str(), right_pad); - } - } - } - } - - void TreeDelegateGenerateChildren(TreeItem &item) override { - // No children for frames yet... - } - - bool TreeDelegateItemSelected(TreeItem &item) override { - Thread *thread = (Thread *)item.GetUserData(); - if (thread) { - thread->GetProcess()->GetThreadList().SetSelectedThreadByID( - thread->GetID()); - const uint64_t frame_idx = item.GetIdentifier(); - thread->SetSelectedFrameByIndex(frame_idx); - return true; - } - return false; - } - -protected: - FormatEntity::Entry m_format; -}; - -class ThreadTreeDelegate : public TreeDelegate { -public: - ThreadTreeDelegate(Debugger &debugger) - : TreeDelegate(), m_debugger(debugger), m_tid(LLDB_INVALID_THREAD_ID), - m_stop_id(UINT32_MAX) { - FormatEntity::Parse("thread #${thread.index}: tid = ${thread.id}{, stop " - "reason = ${thread.stop-reason}}", - m_format); - } - - ~ThreadTreeDelegate() override = default; - - ProcessSP GetProcess() { - return m_debugger.GetCommandInterpreter() - .GetExecutionContext() - .GetProcessSP(); - } - - ThreadSP GetThread(const TreeItem &item) { - ProcessSP process_sp = GetProcess(); - if (process_sp) - return process_sp->GetThreadList().FindThreadByID(item.GetIdentifier()); - return ThreadSP(); - } - - void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override { - ThreadSP thread_sp = GetThread(item); - if (thread_sp) { - StreamString strm; - ExecutionContext exe_ctx(thread_sp); - if (FormatEntity::Format(m_format, strm, nullptr, &exe_ctx, nullptr, - nullptr, false, false)) { - int right_pad = 1; - window.PutCStringTruncated(strm.GetString().str().c_str(), right_pad); - } - } - } - - void TreeDelegateGenerateChildren(TreeItem &item) override { - ProcessSP process_sp = GetProcess(); - if (process_sp && process_sp->IsAlive()) { - StateType state = process_sp->GetState(); - if (StateIsStoppedState(state, true)) { - ThreadSP thread_sp = GetThread(item); - if (thread_sp) { - if (m_stop_id == process_sp->GetStopID() && - thread_sp->GetID() == m_tid) - return; // Children are already up to date - if (!m_frame_delegate_sp) { - // Always expand the thread item the first time we show it - m_frame_delegate_sp = std::make_shared(); - } - - m_stop_id = process_sp->GetStopID(); - m_tid = thread_sp->GetID(); - - TreeItem t(&item, *m_frame_delegate_sp, false); - size_t num_frames = thread_sp->GetStackFrameCount(); - item.Resize(num_frames, t); - for (size_t i = 0; i < num_frames; ++i) { - item[i].SetUserData(thread_sp.get()); - item[i].SetIdentifier(i); - } - } - return; - } - } - item.ClearChildren(); - } - - bool TreeDelegateItemSelected(TreeItem &item) override { - ProcessSP process_sp = GetProcess(); - if (process_sp && process_sp->IsAlive()) { - StateType state = process_sp->GetState(); - if (StateIsStoppedState(state, true)) { - ThreadSP thread_sp = GetThread(item); - if (thread_sp) { - ThreadList &thread_list = thread_sp->GetProcess()->GetThreadList(); - std::lock_guard guard(thread_list.GetMutex()); - ThreadSP selected_thread_sp = thread_list.GetSelectedThread(); - if (selected_thread_sp->GetID() != thread_sp->GetID()) { - thread_list.SetSelectedThreadByID(thread_sp->GetID()); - return true; - } - } - } - } - return false; - } - -protected: - Debugger &m_debugger; - std::shared_ptr m_frame_delegate_sp; - lldb::user_id_t m_tid; - uint32_t m_stop_id; - FormatEntity::Entry m_format; -}; - -class ThreadsTreeDelegate : public TreeDelegate { -public: - ThreadsTreeDelegate(Debugger &debugger) - : TreeDelegate(), m_thread_delegate_sp(), m_debugger(debugger), - m_stop_id(UINT32_MAX) { - FormatEntity::Parse("process ${process.id}{, name = ${process.name}}", - m_format); - } - - ~ThreadsTreeDelegate() override = default; - - ProcessSP GetProcess() { - return m_debugger.GetCommandInterpreter() - .GetExecutionContext() - .GetProcessSP(); - } - - void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override { - ProcessSP process_sp = GetProcess(); - if (process_sp && process_sp->IsAlive()) { - StreamString strm; - ExecutionContext exe_ctx(process_sp); - if (FormatEntity::Format(m_format, strm, nullptr, &exe_ctx, nullptr, - nullptr, false, false)) { - int right_pad = 1; - window.PutCStringTruncated(strm.GetString().str().c_str(), right_pad); - } - } - } - - void TreeDelegateGenerateChildren(TreeItem &item) override { - ProcessSP process_sp = GetProcess(); - if (process_sp && process_sp->IsAlive()) { - StateType state = process_sp->GetState(); - if (StateIsStoppedState(state, true)) { - const uint32_t stop_id = process_sp->GetStopID(); - if (m_stop_id == stop_id) - return; // Children are already up to date - - m_stop_id = stop_id; - - if (!m_thread_delegate_sp) { - // Always expand the thread item the first time we show it - // item.Expand(); - m_thread_delegate_sp = - std::make_shared(m_debugger); - } - - TreeItem t(&item, *m_thread_delegate_sp, false); - ThreadList &threads = process_sp->GetThreadList(); - std::lock_guard guard(threads.GetMutex()); - size_t num_threads = threads.GetSize(); - item.Resize(num_threads, t); - for (size_t i = 0; i < num_threads; ++i) { - item[i].SetIdentifier(threads.GetThreadAtIndex(i)->GetID()); - item[i].SetMightHaveChildren(true); - } - return; - } - } - item.ClearChildren(); - } - - bool TreeDelegateItemSelected(TreeItem &item) override { return false; } - -protected: - std::shared_ptr m_thread_delegate_sp; - Debugger &m_debugger; - uint32_t m_stop_id; - FormatEntity::Entry m_format; -}; - -class ValueObjectListDelegate : public WindowDelegate { -public: - ValueObjectListDelegate() - : m_rows(), m_selected_row(nullptr), m_selected_row_idx(0), - m_first_visible_row(0), m_num_rows(0), m_max_x(0), m_max_y(0) {} - - ValueObjectListDelegate(ValueObjectList &valobj_list) - : m_rows(), m_selected_row(nullptr), m_selected_row_idx(0), - m_first_visible_row(0), m_num_rows(0), m_max_x(0), m_max_y(0) { - SetValues(valobj_list); - } - - ~ValueObjectListDelegate() override = default; - - void SetValues(ValueObjectList &valobj_list) { - m_selected_row = nullptr; - m_selected_row_idx = 0; - m_first_visible_row = 0; - m_num_rows = 0; - m_rows.clear(); - for (auto &valobj_sp : valobj_list.GetObjects()) - m_rows.push_back(Row(valobj_sp, nullptr)); - } - - bool WindowDelegateDraw(Window &window, bool force) override { - m_num_rows = 0; - m_min_x = 2; - m_min_y = 1; - m_max_x = window.GetWidth() - 1; - m_max_y = window.GetHeight() - 1; - - window.Erase(); - window.DrawTitleBox(window.GetName()); - - const int num_visible_rows = NumVisibleRows(); - const int num_rows = CalculateTotalNumberRows(m_rows); - - // If we unexpanded while having something selected our total number of - // rows is less than the num visible rows, then make sure we show all the - // rows by setting the first visible row accordingly. - if (m_first_visible_row > 0 && num_rows < num_visible_rows) - m_first_visible_row = 0; - - // Make sure the selected row is always visible - if (m_selected_row_idx < m_first_visible_row) - m_first_visible_row = m_selected_row_idx; - else if (m_first_visible_row + num_visible_rows <= m_selected_row_idx) - m_first_visible_row = m_selected_row_idx - num_visible_rows + 1; - - DisplayRows(window, m_rows, g_options); - - // Get the selected row - m_selected_row = GetRowForRowIndex(m_selected_row_idx); - // Keep the cursor on the selected row so the highlight and the cursor are - // always on the same line - if (m_selected_row) - window.MoveCursor(m_selected_row->x, m_selected_row->y); - - return true; // Drawing handled - } - - KeyHelp *WindowDelegateGetKeyHelp() override { - static curses::KeyHelp g_source_view_key_help[] = { - {KEY_UP, "Select previous item"}, - {KEY_DOWN, "Select next item"}, - {KEY_RIGHT, "Expand selected item"}, - {KEY_LEFT, "Unexpand selected item or select parent if not expanded"}, - {KEY_PPAGE, "Page up"}, - {KEY_NPAGE, "Page down"}, - {'A', "Format as annotated address"}, - {'b', "Format as binary"}, - {'B', "Format as hex bytes with ASCII"}, - {'c', "Format as character"}, - {'d', "Format as a signed integer"}, - {'D', "Format selected value using the default format for the type"}, - {'f', "Format as float"}, - {'h', "Show help dialog"}, - {'i', "Format as instructions"}, - {'o', "Format as octal"}, - {'p', "Format as pointer"}, - {'s', "Format as C string"}, - {'t', "Toggle showing/hiding type names"}, - {'u', "Format as an unsigned integer"}, - {'x', "Format as hex"}, - {'X', "Format as uppercase hex"}, - {' ', "Toggle item expansion"}, - {',', "Page up"}, - {'.', "Page down"}, - {'\0', nullptr}}; - return g_source_view_key_help; - } - - HandleCharResult WindowDelegateHandleChar(Window &window, int c) override { - switch (c) { - case 'x': - case 'X': - case 'o': - case 's': - case 'u': - case 'd': - case 'D': - case 'i': - case 'A': - case 'p': - case 'c': - case 'b': - case 'B': - case 'f': - // Change the format for the currently selected item - if (m_selected_row) { - auto valobj_sp = m_selected_row->value.GetSP(); - if (valobj_sp) - valobj_sp->SetFormat(FormatForChar(c)); - } - return eKeyHandled; - - case 't': - // Toggle showing type names - g_options.show_types = !g_options.show_types; - return eKeyHandled; - - case ',': - case KEY_PPAGE: - // Page up key - if (m_first_visible_row > 0) { - if (static_cast(m_first_visible_row) > m_max_y) - m_first_visible_row -= m_max_y; - else - m_first_visible_row = 0; - m_selected_row_idx = m_first_visible_row; - } - return eKeyHandled; - - case '.': - case KEY_NPAGE: - // Page down key - if (m_num_rows > static_cast(m_max_y)) { - if (m_first_visible_row + m_max_y < m_num_rows) { - m_first_visible_row += m_max_y; - m_selected_row_idx = m_first_visible_row; - } - } - return eKeyHandled; - - case KEY_UP: - if (m_selected_row_idx > 0) - --m_selected_row_idx; - return eKeyHandled; - - case KEY_DOWN: - if (m_selected_row_idx + 1 < m_num_rows) - ++m_selected_row_idx; - return eKeyHandled; - - case KEY_RIGHT: - if (m_selected_row) { - if (!m_selected_row->expanded) - m_selected_row->Expand(); - } - return eKeyHandled; - - case KEY_LEFT: - if (m_selected_row) { - if (m_selected_row->expanded) - m_selected_row->Unexpand(); - else if (m_selected_row->parent) - m_selected_row_idx = m_selected_row->parent->row_idx; - } - return eKeyHandled; - - case ' ': - // Toggle expansion state when SPACE is pressed - if (m_selected_row) { - if (m_selected_row->expanded) - m_selected_row->Unexpand(); - else - m_selected_row->Expand(); - } - return eKeyHandled; - - case 'h': - window.CreateHelpSubwindow(); - return eKeyHandled; - - default: - break; - } - return eKeyNotHandled; - } - -protected: - std::vector m_rows; - Row *m_selected_row; - uint32_t m_selected_row_idx; - uint32_t m_first_visible_row; - uint32_t m_num_rows; - int m_min_x; - int m_min_y; - int m_max_x; - int m_max_y; - - static Format FormatForChar(int c) { - switch (c) { - case 'x': - return eFormatHex; - case 'X': - return eFormatHexUppercase; - case 'o': - return eFormatOctal; - case 's': - return eFormatCString; - case 'u': - return eFormatUnsigned; - case 'd': - return eFormatDecimal; - case 'D': - return eFormatDefault; - case 'i': - return eFormatInstruction; - case 'A': - return eFormatAddressInfo; - case 'p': - return eFormatPointer; - case 'c': - return eFormatChar; - case 'b': - return eFormatBinary; - case 'B': - return eFormatBytesWithASCII; - case 'f': - return eFormatFloat; - } - return eFormatDefault; - } - - bool DisplayRowObject(Window &window, Row &row, DisplayOptions &options, - bool highlight, bool last_child) { - ValueObject *valobj = row.value.GetSP().get(); - - if (valobj == nullptr) - return false; - - const char *type_name = - options.show_types ? valobj->GetTypeName().GetCString() : nullptr; - const char *name = valobj->GetName().GetCString(); - const char *value = valobj->GetValueAsCString(); - const char *summary = valobj->GetSummaryAsCString(); - - window.MoveCursor(row.x, row.y); - - row.DrawTree(window); - - if (highlight) - window.AttributeOn(A_REVERSE); - - if (type_name && type_name[0]) - window.Printf("(%s) ", type_name); - - if (name && name[0]) - window.PutCString(name); - - attr_t changd_attr = 0; - if (valobj->GetValueDidChange()) - changd_attr = COLOR_PAIR(5) | A_BOLD; - - if (value && value[0]) { - window.PutCString(" = "); - if (changd_attr) - window.AttributeOn(changd_attr); - window.PutCString(value); - if (changd_attr) - window.AttributeOff(changd_attr); - } - - if (summary && summary[0]) { - window.PutChar(' '); - if (changd_attr) - window.AttributeOn(changd_attr); - window.PutCString(summary); - if (changd_attr) - window.AttributeOff(changd_attr); - } - - if (highlight) - window.AttributeOff(A_REVERSE); - - return true; - } - - void DisplayRows(Window &window, std::vector &rows, - DisplayOptions &options) { - // > 0x25B7 - // \/ 0x25BD - - bool window_is_active = window.IsActive(); - for (auto &row : rows) { - const bool last_child = row.parent && &rows[rows.size() - 1] == &row; - // Save the row index in each Row structure - row.row_idx = m_num_rows; - if ((m_num_rows >= m_first_visible_row) && - ((m_num_rows - m_first_visible_row) < - static_cast(NumVisibleRows()))) { - row.x = m_min_x; - row.y = m_num_rows - m_first_visible_row + 1; - if (DisplayRowObject(window, row, options, - window_is_active && - m_num_rows == m_selected_row_idx, - last_child)) { - ++m_num_rows; - } else { - row.x = 0; - row.y = 0; - } - } else { - row.x = 0; - row.y = 0; - ++m_num_rows; - } - - auto &children = row.GetChildren(); - if (row.expanded && !children.empty()) { - DisplayRows(window, children, options); - } - } - } - - int CalculateTotalNumberRows(std::vector &rows) { - int row_count = 0; - for (auto &row : rows) { - ++row_count; - if (row.expanded) - row_count += CalculateTotalNumberRows(row.GetChildren()); - } - return row_count; - } - - static Row *GetRowForRowIndexImpl(std::vector &rows, size_t &row_index) { - for (auto &row : rows) { - if (row_index == 0) - return &row; - else { - --row_index; - auto &children = row.GetChildren(); - if (row.expanded && !children.empty()) { - Row *result = GetRowForRowIndexImpl(children, row_index); - if (result) - return result; - } - } - } - return nullptr; - } - - Row *GetRowForRowIndex(size_t row_index) { - return GetRowForRowIndexImpl(m_rows, row_index); - } - - int NumVisibleRows() const { return m_max_y - m_min_y; } - - static DisplayOptions g_options; -}; - -class FrameVariablesWindowDelegate : public ValueObjectListDelegate { -public: - FrameVariablesWindowDelegate(Debugger &debugger) - : ValueObjectListDelegate(), m_debugger(debugger), - m_frame_block(nullptr) {} - - ~FrameVariablesWindowDelegate() override = default; - - const char *WindowDelegateGetHelpText() override { - return "Frame variable window keyboard shortcuts:"; - } - - bool WindowDelegateDraw(Window &window, bool force) override { - ExecutionContext exe_ctx( - m_debugger.GetCommandInterpreter().GetExecutionContext()); - Process *process = exe_ctx.GetProcessPtr(); - Block *frame_block = nullptr; - StackFrame *frame = nullptr; - - if (process) { - StateType state = process->GetState(); - if (StateIsStoppedState(state, true)) { - frame = exe_ctx.GetFramePtr(); - if (frame) - frame_block = frame->GetFrameBlock(); - } else if (StateIsRunningState(state)) { - return true; // Don't do any updating when we are running - } - } - - ValueObjectList local_values; - if (frame_block) { - // Only update the variables if they have changed - if (m_frame_block != frame_block) { - m_frame_block = frame_block; - - VariableList *locals = frame->GetVariableList(true); - if (locals) { - const DynamicValueType use_dynamic = eDynamicDontRunTarget; - for (const VariableSP &local_sp : *locals) { - ValueObjectSP value_sp = - frame->GetValueObjectForFrameVariable(local_sp, use_dynamic); - if (value_sp) { - ValueObjectSP synthetic_value_sp = value_sp->GetSyntheticValue(); - if (synthetic_value_sp) - local_values.Append(synthetic_value_sp); - else - local_values.Append(value_sp); - } - } - // Update the values - SetValues(local_values); - } - } - } else { - m_frame_block = nullptr; - // Update the values with an empty list if there is no frame - SetValues(local_values); - } - - return ValueObjectListDelegate::WindowDelegateDraw(window, force); - } - -protected: - Debugger &m_debugger; - Block *m_frame_block; -}; - -class RegistersWindowDelegate : public ValueObjectListDelegate { -public: - RegistersWindowDelegate(Debugger &debugger) - : ValueObjectListDelegate(), m_debugger(debugger) {} - - ~RegistersWindowDelegate() override = default; - - const char *WindowDelegateGetHelpText() override { - return "Register window keyboard shortcuts:"; - } - - bool WindowDelegateDraw(Window &window, bool force) override { - ExecutionContext exe_ctx( - m_debugger.GetCommandInterpreter().GetExecutionContext()); - StackFrame *frame = exe_ctx.GetFramePtr(); - - ValueObjectList value_list; - if (frame) { - if (frame->GetStackID() != m_stack_id) { - m_stack_id = frame->GetStackID(); - RegisterContextSP reg_ctx(frame->GetRegisterContext()); - if (reg_ctx) { - const uint32_t num_sets = reg_ctx->GetRegisterSetCount(); - for (uint32_t set_idx = 0; set_idx < num_sets; ++set_idx) { - value_list.Append( - ValueObjectRegisterSet::Create(frame, reg_ctx, set_idx)); - } - } - SetValues(value_list); - } - } else { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive()) - return true; // Don't do any updating if we are running - else { - // Update the values with an empty list if there is no process or the - // process isn't alive anymore - SetValues(value_list); - } - } - return ValueObjectListDelegate::WindowDelegateDraw(window, force); - } - -protected: - Debugger &m_debugger; - StackID m_stack_id; -}; - -static const char *CursesKeyToCString(int ch) { - static char g_desc[32]; - if (ch >= KEY_F0 && ch < KEY_F0 + 64) { - snprintf(g_desc, sizeof(g_desc), "F%u", ch - KEY_F0); - return g_desc; - } - switch (ch) { - case KEY_DOWN: - return "down"; - case KEY_UP: - return "up"; - case KEY_LEFT: - return "left"; - case KEY_RIGHT: - return "right"; - case KEY_HOME: - return "home"; - case KEY_BACKSPACE: - return "backspace"; - case KEY_DL: - return "delete-line"; - case KEY_IL: - return "insert-line"; - case KEY_DC: - return "delete-char"; - case KEY_IC: - return "insert-char"; - case KEY_CLEAR: - return "clear"; - case KEY_EOS: - return "clear-to-eos"; - case KEY_EOL: - return "clear-to-eol"; - case KEY_SF: - return "scroll-forward"; - case KEY_SR: - return "scroll-backward"; - case KEY_NPAGE: - return "page-down"; - case KEY_PPAGE: - return "page-up"; - case KEY_STAB: - return "set-tab"; - case KEY_CTAB: - return "clear-tab"; - case KEY_CATAB: - return "clear-all-tabs"; - case KEY_ENTER: - return "enter"; - case KEY_PRINT: - return "print"; - case KEY_LL: - return "lower-left key"; - case KEY_A1: - return "upper left of keypad"; - case KEY_A3: - return "upper right of keypad"; - case KEY_B2: - return "center of keypad"; - case KEY_C1: - return "lower left of keypad"; - case KEY_C3: - return "lower right of keypad"; - case KEY_BTAB: - return "back-tab key"; - case KEY_BEG: - return "begin key"; - case KEY_CANCEL: - return "cancel key"; - case KEY_CLOSE: - return "close key"; - case KEY_COMMAND: - return "command key"; - case KEY_COPY: - return "copy key"; - case KEY_CREATE: - return "create key"; - case KEY_END: - return "end key"; - case KEY_EXIT: - return "exit key"; - case KEY_FIND: - return "find key"; - case KEY_HELP: - return "help key"; - case KEY_MARK: - return "mark key"; - case KEY_MESSAGE: - return "message key"; - case KEY_MOVE: - return "move key"; - case KEY_NEXT: - return "next key"; - case KEY_OPEN: - return "open key"; - case KEY_OPTIONS: - return "options key"; - case KEY_PREVIOUS: - return "previous key"; - case KEY_REDO: - return "redo key"; - case KEY_REFERENCE: - return "reference key"; - case KEY_REFRESH: - return "refresh key"; - case KEY_REPLACE: - return "replace key"; - case KEY_RESTART: - return "restart key"; - case KEY_RESUME: - return "resume key"; - case KEY_SAVE: - return "save key"; - case KEY_SBEG: - return "shifted begin key"; - case KEY_SCANCEL: - return "shifted cancel key"; - case KEY_SCOMMAND: - return "shifted command key"; - case KEY_SCOPY: - return "shifted copy key"; - case KEY_SCREATE: - return "shifted create key"; - case KEY_SDC: - return "shifted delete-character key"; - case KEY_SDL: - return "shifted delete-line key"; - case KEY_SELECT: - return "select key"; - case KEY_SEND: - return "shifted end key"; - case KEY_SEOL: - return "shifted clear-to-end-of-line key"; - case KEY_SEXIT: - return "shifted exit key"; - case KEY_SFIND: - return "shifted find key"; - case KEY_SHELP: - return "shifted help key"; - case KEY_SHOME: - return "shifted home key"; - case KEY_SIC: - return "shifted insert-character key"; - case KEY_SLEFT: - return "shifted left-arrow key"; - case KEY_SMESSAGE: - return "shifted message key"; - case KEY_SMOVE: - return "shifted move key"; - case KEY_SNEXT: - return "shifted next key"; - case KEY_SOPTIONS: - return "shifted options key"; - case KEY_SPREVIOUS: - return "shifted previous key"; - case KEY_SPRINT: - return "shifted print key"; - case KEY_SREDO: - return "shifted redo key"; - case KEY_SREPLACE: - return "shifted replace key"; - case KEY_SRIGHT: - return "shifted right-arrow key"; - case KEY_SRSUME: - return "shifted resume key"; - case KEY_SSAVE: - return "shifted save key"; - case KEY_SSUSPEND: - return "shifted suspend key"; - case KEY_SUNDO: - return "shifted undo key"; - case KEY_SUSPEND: - return "suspend key"; - case KEY_UNDO: - return "undo key"; - case KEY_MOUSE: - return "Mouse event has occurred"; - case KEY_RESIZE: - return "Terminal resize event"; -#ifdef KEY_EVENT - case KEY_EVENT: - return "We were interrupted by an event"; -#endif - case KEY_RETURN: - return "return"; - case ' ': - return "space"; - case '\t': - return "tab"; - case KEY_ESCAPE: - return "escape"; - default: - if (isprint(ch)) - snprintf(g_desc, sizeof(g_desc), "%c", ch); - else - snprintf(g_desc, sizeof(g_desc), "\\x%2.2x", ch); - return g_desc; - } - return nullptr; -} - -HelpDialogDelegate::HelpDialogDelegate(const char *text, - KeyHelp *key_help_array) - : m_text(), m_first_visible_line(0) { - if (text && text[0]) { - m_text.SplitIntoLines(text); - m_text.AppendString(""); - } - if (key_help_array) { - for (KeyHelp *key = key_help_array; key->ch; ++key) { - StreamString key_description; - key_description.Printf("%10s - %s", CursesKeyToCString(key->ch), - key->description); - m_text.AppendString(key_description.GetString()); - } - } -} - -HelpDialogDelegate::~HelpDialogDelegate() = default; - -bool HelpDialogDelegate::WindowDelegateDraw(Window &window, bool force) { - window.Erase(); - const int window_height = window.GetHeight(); - int x = 2; - int y = 1; - const int min_y = y; - const int max_y = window_height - 1 - y; - const size_t num_visible_lines = max_y - min_y + 1; - const size_t num_lines = m_text.GetSize(); - const char *bottom_message; - if (num_lines <= num_visible_lines) - bottom_message = "Press any key to exit"; - else - bottom_message = "Use arrows to scroll, any other key to exit"; - window.DrawTitleBox(window.GetName(), bottom_message); - while (y <= max_y) { - window.MoveCursor(x, y); - window.PutCStringTruncated( - m_text.GetStringAtIndex(m_first_visible_line + y - min_y), 1); - ++y; - } - return true; -} - -HandleCharResult HelpDialogDelegate::WindowDelegateHandleChar(Window &window, - int key) { - bool done = false; - const size_t num_lines = m_text.GetSize(); - const size_t num_visible_lines = window.GetHeight() - 2; - - if (num_lines <= num_visible_lines) { - done = true; - // If we have all lines visible and don't need scrolling, then any key - // press will cause us to exit - } else { - switch (key) { - case KEY_UP: - if (m_first_visible_line > 0) - --m_first_visible_line; - break; - - case KEY_DOWN: - if (m_first_visible_line + num_visible_lines < num_lines) - ++m_first_visible_line; - break; - - case KEY_PPAGE: - case ',': - if (m_first_visible_line > 0) { - if (static_cast(m_first_visible_line) >= num_visible_lines) - m_first_visible_line -= num_visible_lines; - else - m_first_visible_line = 0; - } - break; - - case KEY_NPAGE: - case '.': - if (m_first_visible_line + num_visible_lines < num_lines) { - m_first_visible_line += num_visible_lines; - if (static_cast(m_first_visible_line) > num_lines) - m_first_visible_line = num_lines - num_visible_lines; - } - break; - - default: - done = true; - break; - } - } - if (done) - window.GetParent()->RemoveSubWindow(&window); - return eKeyHandled; -} - -class ApplicationDelegate : public WindowDelegate, public MenuDelegate { -public: - enum { - eMenuID_LLDB = 1, - eMenuID_LLDBAbout, - eMenuID_LLDBExit, - - eMenuID_Target, - eMenuID_TargetCreate, - eMenuID_TargetDelete, - - eMenuID_Process, - eMenuID_ProcessAttach, - eMenuID_ProcessDetach, - eMenuID_ProcessLaunch, - eMenuID_ProcessContinue, - eMenuID_ProcessHalt, - eMenuID_ProcessKill, - - eMenuID_Thread, - eMenuID_ThreadStepIn, - eMenuID_ThreadStepOver, - eMenuID_ThreadStepOut, - - eMenuID_View, - eMenuID_ViewBacktrace, - eMenuID_ViewRegisters, - eMenuID_ViewSource, - eMenuID_ViewVariables, - - eMenuID_Help, - eMenuID_HelpGUIHelp - }; - - ApplicationDelegate(Application &app, Debugger &debugger) - : WindowDelegate(), MenuDelegate(), m_app(app), m_debugger(debugger) {} - - ~ApplicationDelegate() override = default; - - bool WindowDelegateDraw(Window &window, bool force) override { - return false; // Drawing not handled, let standard window drawing happen - } - - HandleCharResult WindowDelegateHandleChar(Window &window, int key) override { - switch (key) { - case '\t': - window.SelectNextWindowAsActive(); - return eKeyHandled; - - case 'h': - window.CreateHelpSubwindow(); - return eKeyHandled; - - case KEY_ESCAPE: - return eQuitApplication; - - default: - break; - } - return eKeyNotHandled; - } - - const char *WindowDelegateGetHelpText() override { - return "Welcome to the LLDB curses GUI.\n\n" - "Press the TAB key to change the selected view.\n" - "Each view has its own keyboard shortcuts, press 'h' to open a " - "dialog to display them.\n\n" - "Common key bindings for all views:"; - } - - KeyHelp *WindowDelegateGetKeyHelp() override { - static curses::KeyHelp g_source_view_key_help[] = { - {'\t', "Select next view"}, - {'h', "Show help dialog with view specific key bindings"}, - {',', "Page up"}, - {'.', "Page down"}, - {KEY_UP, "Select previous"}, - {KEY_DOWN, "Select next"}, - {KEY_LEFT, "Unexpand or select parent"}, - {KEY_RIGHT, "Expand"}, - {KEY_PPAGE, "Page up"}, - {KEY_NPAGE, "Page down"}, - {'\0', nullptr}}; - return g_source_view_key_help; - } - - MenuActionResult MenuDelegateAction(Menu &menu) override { - switch (menu.GetIdentifier()) { - case eMenuID_ThreadStepIn: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasThreadScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive() && - StateIsStoppedState(process->GetState(), true)) - exe_ctx.GetThreadRef().StepIn(true); - } - } - return MenuActionResult::Handled; - - case eMenuID_ThreadStepOut: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasThreadScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive() && - StateIsStoppedState(process->GetState(), true)) - exe_ctx.GetThreadRef().StepOut(); - } - } - return MenuActionResult::Handled; - - case eMenuID_ThreadStepOver: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasThreadScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive() && - StateIsStoppedState(process->GetState(), true)) - exe_ctx.GetThreadRef().StepOver(true); - } - } - return MenuActionResult::Handled; - - case eMenuID_ProcessContinue: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive() && - StateIsStoppedState(process->GetState(), true)) - process->Resume(); - } - } - return MenuActionResult::Handled; - - case eMenuID_ProcessKill: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive()) - process->Destroy(false); - } - } - return MenuActionResult::Handled; - - case eMenuID_ProcessHalt: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive()) - process->Halt(); - } - } - return MenuActionResult::Handled; - - case eMenuID_ProcessDetach: { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) { - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive()) - process->Detach(false); - } - } - return MenuActionResult::Handled; - - case eMenuID_Process: { - // Populate the menu with all of the threads if the process is stopped - // when the Process menu gets selected and is about to display its - // submenu. - Menus &submenus = menu.GetSubmenus(); - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - Process *process = exe_ctx.GetProcessPtr(); - if (process && process->IsAlive() && - StateIsStoppedState(process->GetState(), true)) { - if (submenus.size() == 7) - menu.AddSubmenu(MenuSP(new Menu(Menu::Type::Separator))); - else if (submenus.size() > 8) - submenus.erase(submenus.begin() + 8, submenus.end()); - - ThreadList &threads = process->GetThreadList(); - std::lock_guard guard(threads.GetMutex()); - size_t num_threads = threads.GetSize(); - for (size_t i = 0; i < num_threads; ++i) { - ThreadSP thread_sp = threads.GetThreadAtIndex(i); - char menu_char = '\0'; - if (i < 9) - menu_char = '1' + i; - StreamString thread_menu_title; - thread_menu_title.Printf("Thread %u", thread_sp->GetIndexID()); - const char *thread_name = thread_sp->GetName(); - if (thread_name && thread_name[0]) - thread_menu_title.Printf(" %s", thread_name); - else { - const char *queue_name = thread_sp->GetQueueName(); - if (queue_name && queue_name[0]) - thread_menu_title.Printf(" %s", queue_name); - } - menu.AddSubmenu( - MenuSP(new Menu(thread_menu_title.GetString().str().c_str(), - nullptr, menu_char, thread_sp->GetID()))); - } - } else if (submenus.size() > 7) { - // Remove the separator and any other thread submenu items that were - // previously added - submenus.erase(submenus.begin() + 7, submenus.end()); - } - // Since we are adding and removing items we need to recalculate the name - // lengths - menu.RecalculateNameLengths(); - } - return MenuActionResult::Handled; - - case eMenuID_ViewVariables: { - WindowSP main_window_sp = m_app.GetMainWindow(); - WindowSP source_window_sp = main_window_sp->FindSubWindow("Source"); - WindowSP variables_window_sp = main_window_sp->FindSubWindow("Variables"); - WindowSP registers_window_sp = main_window_sp->FindSubWindow("Registers"); - const Rect source_bounds = source_window_sp->GetBounds(); - - if (variables_window_sp) { - const Rect variables_bounds = variables_window_sp->GetBounds(); - - main_window_sp->RemoveSubWindow(variables_window_sp.get()); - - if (registers_window_sp) { - // We have a registers window, so give all the area back to the - // registers window - Rect registers_bounds = variables_bounds; - registers_bounds.size.width = source_bounds.size.width; - registers_window_sp->SetBounds(registers_bounds); - } else { - // We have no registers window showing so give the bottom area back - // to the source view - source_window_sp->Resize(source_bounds.size.width, - source_bounds.size.height + - variables_bounds.size.height); - } - } else { - Rect new_variables_rect; - if (registers_window_sp) { - // We have a registers window so split the area of the registers - // window into two columns where the left hand side will be the - // variables and the right hand side will be the registers - const Rect variables_bounds = registers_window_sp->GetBounds(); - Rect new_registers_rect; - variables_bounds.VerticalSplitPercentage(0.50, new_variables_rect, - new_registers_rect); - registers_window_sp->SetBounds(new_registers_rect); - } else { - // No variables window, grab the bottom part of the source window - Rect new_source_rect; - source_bounds.HorizontalSplitPercentage(0.70, new_source_rect, - new_variables_rect); - source_window_sp->SetBounds(new_source_rect); - } - WindowSP new_window_sp = main_window_sp->CreateSubWindow( - "Variables", new_variables_rect, false); - new_window_sp->SetDelegate( - WindowDelegateSP(new FrameVariablesWindowDelegate(m_debugger))); - } - touchwin(stdscr); - } - return MenuActionResult::Handled; - - case eMenuID_ViewRegisters: { - WindowSP main_window_sp = m_app.GetMainWindow(); - WindowSP source_window_sp = main_window_sp->FindSubWindow("Source"); - WindowSP variables_window_sp = main_window_sp->FindSubWindow("Variables"); - WindowSP registers_window_sp = main_window_sp->FindSubWindow("Registers"); - const Rect source_bounds = source_window_sp->GetBounds(); - - if (registers_window_sp) { - if (variables_window_sp) { - const Rect variables_bounds = variables_window_sp->GetBounds(); - - // We have a variables window, so give all the area back to the - // variables window - variables_window_sp->Resize(variables_bounds.size.width + - registers_window_sp->GetWidth(), - variables_bounds.size.height); - } else { - // We have no variables window showing so give the bottom area back - // to the source view - source_window_sp->Resize(source_bounds.size.width, - source_bounds.size.height + - registers_window_sp->GetHeight()); - } - main_window_sp->RemoveSubWindow(registers_window_sp.get()); - } else { - Rect new_regs_rect; - if (variables_window_sp) { - // We have a variables window, split it into two columns where the - // left hand side will be the variables and the right hand side will - // be the registers - const Rect variables_bounds = variables_window_sp->GetBounds(); - Rect new_vars_rect; - variables_bounds.VerticalSplitPercentage(0.50, new_vars_rect, - new_regs_rect); - variables_window_sp->SetBounds(new_vars_rect); - } else { - // No registers window, grab the bottom part of the source window - Rect new_source_rect; - source_bounds.HorizontalSplitPercentage(0.70, new_source_rect, - new_regs_rect); - source_window_sp->SetBounds(new_source_rect); - } - WindowSP new_window_sp = - main_window_sp->CreateSubWindow("Registers", new_regs_rect, false); - new_window_sp->SetDelegate( - WindowDelegateSP(new RegistersWindowDelegate(m_debugger))); - } - touchwin(stdscr); - } - return MenuActionResult::Handled; - - case eMenuID_HelpGUIHelp: - m_app.GetMainWindow()->CreateHelpSubwindow(); - return MenuActionResult::Handled; - - default: - break; - } - - return MenuActionResult::NotHandled; - } - -protected: - Application &m_app; - Debugger &m_debugger; -}; - -class StatusBarWindowDelegate : public WindowDelegate { -public: - StatusBarWindowDelegate(Debugger &debugger) : m_debugger(debugger) { - FormatEntity::Parse("Thread: ${thread.id%tid}", m_format); - } - - ~StatusBarWindowDelegate() override = default; - - bool WindowDelegateDraw(Window &window, bool force) override { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - Process *process = exe_ctx.GetProcessPtr(); - Thread *thread = exe_ctx.GetThreadPtr(); - StackFrame *frame = exe_ctx.GetFramePtr(); - window.Erase(); - window.SetBackground(2); - window.MoveCursor(0, 0); - if (process) { - const StateType state = process->GetState(); - window.Printf("Process: %5" PRIu64 " %10s", process->GetID(), - StateAsCString(state)); - - if (StateIsStoppedState(state, true)) { - StreamString strm; - if (thread && FormatEntity::Format(m_format, strm, nullptr, &exe_ctx, - nullptr, nullptr, false, false)) { - window.MoveCursor(40, 0); - window.PutCStringTruncated(strm.GetString().str().c_str(), 1); - } - - window.MoveCursor(60, 0); - if (frame) - window.Printf("Frame: %3u PC = 0x%16.16" PRIx64, - frame->GetFrameIndex(), - frame->GetFrameCodeAddress().GetOpcodeLoadAddress( - exe_ctx.GetTargetPtr())); - } else if (state == eStateExited) { - const char *exit_desc = process->GetExitDescription(); - const int exit_status = process->GetExitStatus(); - if (exit_desc && exit_desc[0]) - window.Printf(" with status = %i (%s)", exit_status, exit_desc); - else - window.Printf(" with status = %i", exit_status); - } - } - return true; - } - -protected: - Debugger &m_debugger; - FormatEntity::Entry m_format; -}; - -class SourceFileWindowDelegate : public WindowDelegate { -public: - SourceFileWindowDelegate(Debugger &debugger) - : WindowDelegate(), m_debugger(debugger), m_sc(), m_file_sp(), - m_disassembly_scope(nullptr), m_disassembly_sp(), m_disassembly_range(), - m_title(), m_line_width(4), m_selected_line(0), m_pc_line(0), - m_stop_id(0), m_frame_idx(UINT32_MAX), m_first_visible_line(0), - m_min_x(0), m_min_y(0), m_max_x(0), m_max_y(0) {} - - ~SourceFileWindowDelegate() override = default; - - void Update(const SymbolContext &sc) { m_sc = sc; } - - uint32_t NumVisibleLines() const { return m_max_y - m_min_y; } - - const char *WindowDelegateGetHelpText() override { - return "Source/Disassembly window keyboard shortcuts:"; - } - - KeyHelp *WindowDelegateGetKeyHelp() override { - static curses::KeyHelp g_source_view_key_help[] = { - {KEY_RETURN, "Run to selected line with one shot breakpoint"}, - {KEY_UP, "Select previous source line"}, - {KEY_DOWN, "Select next source line"}, - {KEY_PPAGE, "Page up"}, - {KEY_NPAGE, "Page down"}, - {'b', "Set breakpoint on selected source/disassembly line"}, - {'c', "Continue process"}, - {'d', "Detach and resume process"}, - {'D', "Detach with process suspended"}, - {'h', "Show help dialog"}, - {'k', "Kill process"}, - {'n', "Step over (source line)"}, - {'N', "Step over (single instruction)"}, - {'o', "Step out"}, - {'s', "Step in (source line)"}, - {'S', "Step in (single instruction)"}, - {',', "Page up"}, - {'.', "Page down"}, - {'\0', nullptr}}; - return g_source_view_key_help; - } - - bool WindowDelegateDraw(Window &window, bool force) override { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - Process *process = exe_ctx.GetProcessPtr(); - Thread *thread = nullptr; - - bool update_location = false; - if (process) { - StateType state = process->GetState(); - if (StateIsStoppedState(state, true)) { - // We are stopped, so it is ok to - update_location = true; - } - } - - m_min_x = 1; - m_min_y = 2; - m_max_x = window.GetMaxX() - 1; - m_max_y = window.GetMaxY() - 1; - - const uint32_t num_visible_lines = NumVisibleLines(); - StackFrameSP frame_sp; - bool set_selected_line_to_pc = false; - - if (update_location) { - const bool process_alive = process ? process->IsAlive() : false; - bool thread_changed = false; - if (process_alive) { - thread = exe_ctx.GetThreadPtr(); - if (thread) { - frame_sp = thread->GetSelectedFrame(); - auto tid = thread->GetID(); - thread_changed = tid != m_tid; - m_tid = tid; - } else { - if (m_tid != LLDB_INVALID_THREAD_ID) { - thread_changed = true; - m_tid = LLDB_INVALID_THREAD_ID; - } - } - } - const uint32_t stop_id = process ? process->GetStopID() : 0; - const bool stop_id_changed = stop_id != m_stop_id; - bool frame_changed = false; - m_stop_id = stop_id; - m_title.Clear(); - if (frame_sp) { - m_sc = frame_sp->GetSymbolContext(eSymbolContextEverything); - if (m_sc.module_sp) { - m_title.Printf( - "%s", m_sc.module_sp->GetFileSpec().GetFilename().GetCString()); - ConstString func_name = m_sc.GetFunctionName(); - if (func_name) - m_title.Printf("`%s", func_name.GetCString()); - } - const uint32_t frame_idx = frame_sp->GetFrameIndex(); - frame_changed = frame_idx != m_frame_idx; - m_frame_idx = frame_idx; - } else { - m_sc.Clear(true); - frame_changed = m_frame_idx != UINT32_MAX; - m_frame_idx = UINT32_MAX; - } - - const bool context_changed = - thread_changed || frame_changed || stop_id_changed; - - if (process_alive) { - if (m_sc.line_entry.IsValid()) { - m_pc_line = m_sc.line_entry.line; - if (m_pc_line != UINT32_MAX) - --m_pc_line; // Convert to zero based line number... - // Update the selected line if the stop ID changed... - if (context_changed) - m_selected_line = m_pc_line; - - if (m_file_sp && m_file_sp->FileSpecMatches(m_sc.line_entry.file)) { - // Same file, nothing to do, we should either have the lines or not - // (source file missing) - if (m_selected_line >= static_cast(m_first_visible_line)) { - if (m_selected_line >= m_first_visible_line + num_visible_lines) - m_first_visible_line = m_selected_line - 10; - } else { - if (m_selected_line > 10) - m_first_visible_line = m_selected_line - 10; - else - m_first_visible_line = 0; - } - } else { - // File changed, set selected line to the line with the PC - m_selected_line = m_pc_line; - m_file_sp = - m_debugger.GetSourceManager().GetFile(m_sc.line_entry.file); - if (m_file_sp) { - const size_t num_lines = m_file_sp->GetNumLines(); - m_line_width = 1; - for (size_t n = num_lines; n >= 10; n = n / 10) - ++m_line_width; - - if (num_lines < num_visible_lines || - m_selected_line < num_visible_lines) - m_first_visible_line = 0; - else - m_first_visible_line = m_selected_line - 10; - } - } - } else { - m_file_sp.reset(); - } - - if (!m_file_sp || m_file_sp->GetNumLines() == 0) { - // Show disassembly - bool prefer_file_cache = false; - if (m_sc.function) { - if (m_disassembly_scope != m_sc.function) { - m_disassembly_scope = m_sc.function; - m_disassembly_sp = m_sc.function->GetInstructions( - exe_ctx, nullptr, prefer_file_cache); - if (m_disassembly_sp) { - set_selected_line_to_pc = true; - m_disassembly_range = m_sc.function->GetAddressRange(); - } else { - m_disassembly_range.Clear(); - } - } else { - set_selected_line_to_pc = context_changed; - } - } else if (m_sc.symbol) { - if (m_disassembly_scope != m_sc.symbol) { - m_disassembly_scope = m_sc.symbol; - m_disassembly_sp = m_sc.symbol->GetInstructions( - exe_ctx, nullptr, prefer_file_cache); - if (m_disassembly_sp) { - set_selected_line_to_pc = true; - m_disassembly_range.GetBaseAddress() = - m_sc.symbol->GetAddress(); - m_disassembly_range.SetByteSize(m_sc.symbol->GetByteSize()); - } else { - m_disassembly_range.Clear(); - } - } else { - set_selected_line_to_pc = context_changed; - } - } - } - } else { - m_pc_line = UINT32_MAX; - } - } - - const int window_width = window.GetWidth(); - window.Erase(); - window.DrawTitleBox("Sources"); - if (!m_title.GetString().empty()) { - window.AttributeOn(A_REVERSE); - window.MoveCursor(1, 1); - window.PutChar(' '); - window.PutCStringTruncated(m_title.GetString().str().c_str(), 1); - int x = window.GetCursorX(); - if (x < window_width - 1) { - window.Printf("%*s", window_width - x - 1, ""); - } - window.AttributeOff(A_REVERSE); - } - - Target *target = exe_ctx.GetTargetPtr(); - const size_t num_source_lines = GetNumSourceLines(); - if (num_source_lines > 0) { - // Display source - BreakpointLines bp_lines; - if (target) { - BreakpointList &bp_list = target->GetBreakpointList(); - const size_t num_bps = bp_list.GetSize(); - for (size_t bp_idx = 0; bp_idx < num_bps; ++bp_idx) { - BreakpointSP bp_sp = bp_list.GetBreakpointAtIndex(bp_idx); - const size_t num_bps_locs = bp_sp->GetNumLocations(); - for (size_t bp_loc_idx = 0; bp_loc_idx < num_bps_locs; ++bp_loc_idx) { - BreakpointLocationSP bp_loc_sp = - bp_sp->GetLocationAtIndex(bp_loc_idx); - LineEntry bp_loc_line_entry; - if (bp_loc_sp->GetAddress().CalculateSymbolContextLineEntry( - bp_loc_line_entry)) { - if (m_file_sp->GetFileSpec() == bp_loc_line_entry.file) { - bp_lines.insert(bp_loc_line_entry.line); - } - } - } - } - } - - const attr_t selected_highlight_attr = A_REVERSE; - const attr_t pc_highlight_attr = COLOR_PAIR(1); - - for (size_t i = 0; i < num_visible_lines; ++i) { - const uint32_t curr_line = m_first_visible_line + i; - if (curr_line < num_source_lines) { - const int line_y = m_min_y + i; - window.MoveCursor(1, line_y); - const bool is_pc_line = curr_line == m_pc_line; - const bool line_is_selected = m_selected_line == curr_line; - // Highlight the line as the PC line first, then if the selected line - // isn't the same as the PC line, highlight it differently - attr_t highlight_attr = 0; - attr_t bp_attr = 0; - if (is_pc_line) - highlight_attr = pc_highlight_attr; - else if (line_is_selected) - highlight_attr = selected_highlight_attr; - - if (bp_lines.find(curr_line + 1) != bp_lines.end()) - bp_attr = COLOR_PAIR(2); - - if (bp_attr) - window.AttributeOn(bp_attr); - - window.Printf(" %*u ", m_line_width, curr_line + 1); - - if (bp_attr) - window.AttributeOff(bp_attr); - - window.PutChar(ACS_VLINE); - // Mark the line with the PC with a diamond - if (is_pc_line) - window.PutChar(ACS_DIAMOND); - else - window.PutChar(' '); - - if (highlight_attr) - window.AttributeOn(highlight_attr); - const uint32_t line_len = - m_file_sp->GetLineLength(curr_line + 1, false); - if (line_len > 0) - window.PutCString(m_file_sp->PeekLineData(curr_line + 1), line_len); - - if (is_pc_line && frame_sp && - frame_sp->GetConcreteFrameIndex() == 0) { - StopInfoSP stop_info_sp; - if (thread) - stop_info_sp = thread->GetStopInfo(); - if (stop_info_sp) { - const char *stop_description = stop_info_sp->GetDescription(); - if (stop_description && stop_description[0]) { - size_t stop_description_len = strlen(stop_description); - int desc_x = window_width - stop_description_len - 16; - window.Printf("%*s", desc_x - window.GetCursorX(), ""); - // window.MoveCursor(window_width - stop_description_len - 15, - // line_y); - window.Printf("<<< Thread %u: %s ", thread->GetIndexID(), - stop_description); - } - } else { - window.Printf("%*s", window_width - window.GetCursorX() - 1, ""); - } - } - if (highlight_attr) - window.AttributeOff(highlight_attr); - } else { - break; - } - } - } else { - size_t num_disassembly_lines = GetNumDisassemblyLines(); - if (num_disassembly_lines > 0) { - // Display disassembly - BreakpointAddrs bp_file_addrs; - Target *target = exe_ctx.GetTargetPtr(); - if (target) { - BreakpointList &bp_list = target->GetBreakpointList(); - const size_t num_bps = bp_list.GetSize(); - for (size_t bp_idx = 0; bp_idx < num_bps; ++bp_idx) { - BreakpointSP bp_sp = bp_list.GetBreakpointAtIndex(bp_idx); - const size_t num_bps_locs = bp_sp->GetNumLocations(); - for (size_t bp_loc_idx = 0; bp_loc_idx < num_bps_locs; - ++bp_loc_idx) { - BreakpointLocationSP bp_loc_sp = - bp_sp->GetLocationAtIndex(bp_loc_idx); - LineEntry bp_loc_line_entry; - const lldb::addr_t file_addr = - bp_loc_sp->GetAddress().GetFileAddress(); - if (file_addr != LLDB_INVALID_ADDRESS) { - if (m_disassembly_range.ContainsFileAddress(file_addr)) - bp_file_addrs.insert(file_addr); - } - } - } - } - - const attr_t selected_highlight_attr = A_REVERSE; - const attr_t pc_highlight_attr = COLOR_PAIR(1); - - StreamString strm; - - InstructionList &insts = m_disassembly_sp->GetInstructionList(); - Address pc_address; - - if (frame_sp) - pc_address = frame_sp->GetFrameCodeAddress(); - const uint32_t pc_idx = - pc_address.IsValid() - ? insts.GetIndexOfInstructionAtAddress(pc_address) - : UINT32_MAX; - if (set_selected_line_to_pc) { - m_selected_line = pc_idx; - } - - const uint32_t non_visible_pc_offset = (num_visible_lines / 5); - if (static_cast(m_first_visible_line) >= num_disassembly_lines) - m_first_visible_line = 0; - - if (pc_idx < num_disassembly_lines) { - if (pc_idx < static_cast(m_first_visible_line) || - pc_idx >= m_first_visible_line + num_visible_lines) - m_first_visible_line = pc_idx - non_visible_pc_offset; - } - - for (size_t i = 0; i < num_visible_lines; ++i) { - const uint32_t inst_idx = m_first_visible_line + i; - Instruction *inst = insts.GetInstructionAtIndex(inst_idx).get(); - if (!inst) - break; - - const int line_y = m_min_y + i; - window.MoveCursor(1, line_y); - const bool is_pc_line = frame_sp && inst_idx == pc_idx; - const bool line_is_selected = m_selected_line == inst_idx; - // Highlight the line as the PC line first, then if the selected line - // isn't the same as the PC line, highlight it differently - attr_t highlight_attr = 0; - attr_t bp_attr = 0; - if (is_pc_line) - highlight_attr = pc_highlight_attr; - else if (line_is_selected) - highlight_attr = selected_highlight_attr; - - if (bp_file_addrs.find(inst->GetAddress().GetFileAddress()) != - bp_file_addrs.end()) - bp_attr = COLOR_PAIR(2); - - if (bp_attr) - window.AttributeOn(bp_attr); - - window.Printf(" 0x%16.16llx ", - static_cast( - inst->GetAddress().GetLoadAddress(target))); - - if (bp_attr) - window.AttributeOff(bp_attr); - - window.PutChar(ACS_VLINE); - // Mark the line with the PC with a diamond - if (is_pc_line) - window.PutChar(ACS_DIAMOND); - else - window.PutChar(' '); - - if (highlight_attr) - window.AttributeOn(highlight_attr); - - const char *mnemonic = inst->GetMnemonic(&exe_ctx); - const char *operands = inst->GetOperands(&exe_ctx); - const char *comment = inst->GetComment(&exe_ctx); - - if (mnemonic != nullptr && mnemonic[0] == '\0') - mnemonic = nullptr; - if (operands != nullptr && operands[0] == '\0') - operands = nullptr; - if (comment != nullptr && comment[0] == '\0') - comment = nullptr; - - strm.Clear(); - - if (mnemonic != nullptr && operands != nullptr && comment != nullptr) - strm.Printf("%-8s %-25s ; %s", mnemonic, operands, comment); - else if (mnemonic != nullptr && operands != nullptr) - strm.Printf("%-8s %s", mnemonic, operands); - else if (mnemonic != nullptr) - strm.Printf("%s", mnemonic); - - int right_pad = 1; - window.PutCStringTruncated(strm.GetData(), right_pad); - - if (is_pc_line && frame_sp && - frame_sp->GetConcreteFrameIndex() == 0) { - StopInfoSP stop_info_sp; - if (thread) - stop_info_sp = thread->GetStopInfo(); - if (stop_info_sp) { - const char *stop_description = stop_info_sp->GetDescription(); - if (stop_description && stop_description[0]) { - size_t stop_description_len = strlen(stop_description); - int desc_x = window_width - stop_description_len - 16; - window.Printf("%*s", desc_x - window.GetCursorX(), ""); - // window.MoveCursor(window_width - stop_description_len - 15, - // line_y); - window.Printf("<<< Thread %u: %s ", thread->GetIndexID(), - stop_description); - } - } else { - window.Printf("%*s", window_width - window.GetCursorX() - 1, ""); - } - } - if (highlight_attr) - window.AttributeOff(highlight_attr); - } - } - } - return true; // Drawing handled - } - - size_t GetNumLines() { - size_t num_lines = GetNumSourceLines(); - if (num_lines == 0) - num_lines = GetNumDisassemblyLines(); - return num_lines; - } - - size_t GetNumSourceLines() const { - if (m_file_sp) - return m_file_sp->GetNumLines(); - return 0; - } - - size_t GetNumDisassemblyLines() const { - if (m_disassembly_sp) - return m_disassembly_sp->GetInstructionList().GetSize(); - return 0; - } - - HandleCharResult WindowDelegateHandleChar(Window &window, int c) override { - const uint32_t num_visible_lines = NumVisibleLines(); - const size_t num_lines = GetNumLines(); - - switch (c) { - case ',': - case KEY_PPAGE: - // Page up key - if (static_cast(m_first_visible_line) > num_visible_lines) - m_first_visible_line -= num_visible_lines; - else - m_first_visible_line = 0; - m_selected_line = m_first_visible_line; - return eKeyHandled; - - case '.': - case KEY_NPAGE: - // Page down key - { - if (m_first_visible_line + num_visible_lines < num_lines) - m_first_visible_line += num_visible_lines; - else if (num_lines < num_visible_lines) - m_first_visible_line = 0; - else - m_first_visible_line = num_lines - num_visible_lines; - m_selected_line = m_first_visible_line; - } - return eKeyHandled; - - case KEY_UP: - if (m_selected_line > 0) { - m_selected_line--; - if (static_cast(m_first_visible_line) > m_selected_line) - m_first_visible_line = m_selected_line; - } - return eKeyHandled; - - case KEY_DOWN: - if (m_selected_line + 1 < num_lines) { - m_selected_line++; - if (m_first_visible_line + num_visible_lines < m_selected_line) - m_first_visible_line++; - } - return eKeyHandled; - - case '\r': - case '\n': - case KEY_ENTER: - // Set a breakpoint and run to the line using a one shot breakpoint - if (GetNumSourceLines() > 0) { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope() && exe_ctx.GetProcessRef().IsAlive()) { - BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( - nullptr, // Don't limit the breakpoint to certain modules - m_file_sp->GetFileSpec(), // Source file - m_selected_line + - 1, // Source line number (m_selected_line is zero based) - 0, // Unspecified column. - 0, // No offset - eLazyBoolCalculate, // Check inlines using global setting - eLazyBoolCalculate, // Skip prologue using global setting, - false, // internal - false, // request_hardware - eLazyBoolCalculate); // move_to_nearest_code - // Make breakpoint one shot - bp_sp->GetOptions()->SetOneShot(true); - exe_ctx.GetProcessRef().Resume(); - } - } else if (m_selected_line < GetNumDisassemblyLines()) { - const Instruction *inst = m_disassembly_sp->GetInstructionList() - .GetInstructionAtIndex(m_selected_line) - .get(); - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasTargetScope()) { - Address addr = inst->GetAddress(); - BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( - addr, // lldb_private::Address - false, // internal - false); // request_hardware - // Make breakpoint one shot - bp_sp->GetOptions()->SetOneShot(true); - exe_ctx.GetProcessRef().Resume(); - } - } - return eKeyHandled; - - case 'b': // 'b' == toggle breakpoint on currently selected line - if (m_selected_line < GetNumSourceLines()) { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasTargetScope()) { - BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( - nullptr, // Don't limit the breakpoint to certain modules - m_file_sp->GetFileSpec(), // Source file - m_selected_line + - 1, // Source line number (m_selected_line is zero based) - 0, // No column specified. - 0, // No offset - eLazyBoolCalculate, // Check inlines using global setting - eLazyBoolCalculate, // Skip prologue using global setting, - false, // internal - false, // request_hardware - eLazyBoolCalculate); // move_to_nearest_code - } - } else if (m_selected_line < GetNumDisassemblyLines()) { - const Instruction *inst = m_disassembly_sp->GetInstructionList() - .GetInstructionAtIndex(m_selected_line) - .get(); - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasTargetScope()) { - Address addr = inst->GetAddress(); - BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( - addr, // lldb_private::Address - false, // internal - false); // request_hardware - } - } - return eKeyHandled; - - case 'd': // 'd' == detach and let run - case 'D': // 'D' == detach and keep stopped - { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) - exe_ctx.GetProcessRef().Detach(c == 'D'); - } - return eKeyHandled; - - case 'k': - // 'k' == kill - { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) - exe_ctx.GetProcessRef().Destroy(false); - } - return eKeyHandled; - - case 'c': - // 'c' == continue - { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasProcessScope()) - exe_ctx.GetProcessRef().Resume(); - } - return eKeyHandled; - - case 'o': - // 'o' == step out - { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasThreadScope() && - StateIsStoppedState(exe_ctx.GetProcessRef().GetState(), true)) { - exe_ctx.GetThreadRef().StepOut(); - } - } - return eKeyHandled; - - case 'n': // 'n' == step over - case 'N': // 'N' == step over instruction - { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasThreadScope() && - StateIsStoppedState(exe_ctx.GetProcessRef().GetState(), true)) { - bool source_step = (c == 'n'); - exe_ctx.GetThreadRef().StepOver(source_step); - } - } - return eKeyHandled; - - case 's': // 's' == step into - case 'S': // 'S' == step into instruction - { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasThreadScope() && - StateIsStoppedState(exe_ctx.GetProcessRef().GetState(), true)) { - bool source_step = (c == 's'); - exe_ctx.GetThreadRef().StepIn(source_step); - } - } - return eKeyHandled; - - case 'h': - window.CreateHelpSubwindow(); - return eKeyHandled; - - default: - break; - } - return eKeyNotHandled; - } - -protected: - typedef std::set BreakpointLines; - typedef std::set BreakpointAddrs; - - Debugger &m_debugger; - SymbolContext m_sc; - SourceManager::FileSP m_file_sp; - SymbolContextScope *m_disassembly_scope; - lldb::DisassemblerSP m_disassembly_sp; - AddressRange m_disassembly_range; - StreamString m_title; - lldb::user_id_t m_tid; - int m_line_width; - uint32_t m_selected_line; // The selected line - uint32_t m_pc_line; // The line with the PC - uint32_t m_stop_id; - uint32_t m_frame_idx; - int m_first_visible_line; - int m_min_x; - int m_min_y; - int m_max_x; - int m_max_y; -}; - -DisplayOptions ValueObjectListDelegate::g_options = {true}; - -IOHandlerCursesGUI::IOHandlerCursesGUI(Debugger &debugger) - : IOHandler(debugger, IOHandler::Type::Curses) {} - -void IOHandlerCursesGUI::Activate() { - IOHandler::Activate(); - if (!m_app_ap) { - m_app_ap.reset(new Application(GetInputFILE(), GetOutputFILE())); - - // This is both a window and a menu delegate - std::shared_ptr app_delegate_sp( - new ApplicationDelegate(*m_app_ap, m_debugger)); - - MenuDelegateSP app_menu_delegate_sp = - std::static_pointer_cast(app_delegate_sp); - MenuSP lldb_menu_sp( - new Menu("LLDB", "F1", KEY_F(1), ApplicationDelegate::eMenuID_LLDB)); - MenuSP exit_menuitem_sp( - new Menu("Exit", nullptr, 'x', ApplicationDelegate::eMenuID_LLDBExit)); - exit_menuitem_sp->SetCannedResult(MenuActionResult::Quit); - lldb_menu_sp->AddSubmenu(MenuSP(new Menu( - "About LLDB", nullptr, 'a', ApplicationDelegate::eMenuID_LLDBAbout))); - lldb_menu_sp->AddSubmenu(MenuSP(new Menu(Menu::Type::Separator))); - lldb_menu_sp->AddSubmenu(exit_menuitem_sp); - - MenuSP target_menu_sp(new Menu("Target", "F2", KEY_F(2), - ApplicationDelegate::eMenuID_Target)); - target_menu_sp->AddSubmenu(MenuSP(new Menu( - "Create", nullptr, 'c', ApplicationDelegate::eMenuID_TargetCreate))); - target_menu_sp->AddSubmenu(MenuSP(new Menu( - "Delete", nullptr, 'd', ApplicationDelegate::eMenuID_TargetDelete))); - - MenuSP process_menu_sp(new Menu("Process", "F3", KEY_F(3), - ApplicationDelegate::eMenuID_Process)); - process_menu_sp->AddSubmenu(MenuSP(new Menu( - "Attach", nullptr, 'a', ApplicationDelegate::eMenuID_ProcessAttach))); - process_menu_sp->AddSubmenu(MenuSP(new Menu( - "Detach", nullptr, 'd', ApplicationDelegate::eMenuID_ProcessDetach))); - process_menu_sp->AddSubmenu(MenuSP(new Menu( - "Launch", nullptr, 'l', ApplicationDelegate::eMenuID_ProcessLaunch))); - process_menu_sp->AddSubmenu(MenuSP(new Menu(Menu::Type::Separator))); - process_menu_sp->AddSubmenu( - MenuSP(new Menu("Continue", nullptr, 'c', - ApplicationDelegate::eMenuID_ProcessContinue))); - process_menu_sp->AddSubmenu(MenuSP(new Menu( - "Halt", nullptr, 'h', ApplicationDelegate::eMenuID_ProcessHalt))); - process_menu_sp->AddSubmenu(MenuSP(new Menu( - "Kill", nullptr, 'k', ApplicationDelegate::eMenuID_ProcessKill))); - - MenuSP thread_menu_sp(new Menu("Thread", "F4", KEY_F(4), - ApplicationDelegate::eMenuID_Thread)); - thread_menu_sp->AddSubmenu(MenuSP(new Menu( - "Step In", nullptr, 'i', ApplicationDelegate::eMenuID_ThreadStepIn))); - thread_menu_sp->AddSubmenu( - MenuSP(new Menu("Step Over", nullptr, 'v', - ApplicationDelegate::eMenuID_ThreadStepOver))); - thread_menu_sp->AddSubmenu(MenuSP(new Menu( - "Step Out", nullptr, 'o', ApplicationDelegate::eMenuID_ThreadStepOut))); - - MenuSP view_menu_sp( - new Menu("View", "F5", KEY_F(5), ApplicationDelegate::eMenuID_View)); - view_menu_sp->AddSubmenu( - MenuSP(new Menu("Backtrace", nullptr, 'b', - ApplicationDelegate::eMenuID_ViewBacktrace))); - view_menu_sp->AddSubmenu( - MenuSP(new Menu("Registers", nullptr, 'r', - ApplicationDelegate::eMenuID_ViewRegisters))); - view_menu_sp->AddSubmenu(MenuSP(new Menu( - "Source", nullptr, 's', ApplicationDelegate::eMenuID_ViewSource))); - view_menu_sp->AddSubmenu( - MenuSP(new Menu("Variables", nullptr, 'v', - ApplicationDelegate::eMenuID_ViewVariables))); - - MenuSP help_menu_sp( - new Menu("Help", "F6", KEY_F(6), ApplicationDelegate::eMenuID_Help)); - help_menu_sp->AddSubmenu(MenuSP(new Menu( - "GUI Help", nullptr, 'g', ApplicationDelegate::eMenuID_HelpGUIHelp))); - - m_app_ap->Initialize(); - WindowSP &main_window_sp = m_app_ap->GetMainWindow(); - - MenuSP menubar_sp(new Menu(Menu::Type::Bar)); - menubar_sp->AddSubmenu(lldb_menu_sp); - menubar_sp->AddSubmenu(target_menu_sp); - menubar_sp->AddSubmenu(process_menu_sp); - menubar_sp->AddSubmenu(thread_menu_sp); - menubar_sp->AddSubmenu(view_menu_sp); - menubar_sp->AddSubmenu(help_menu_sp); - menubar_sp->SetDelegate(app_menu_delegate_sp); - - Rect content_bounds = main_window_sp->GetFrame(); - Rect menubar_bounds = content_bounds.MakeMenuBar(); - Rect status_bounds = content_bounds.MakeStatusBar(); - Rect source_bounds; - Rect variables_bounds; - Rect threads_bounds; - Rect source_variables_bounds; - content_bounds.VerticalSplitPercentage(0.80, source_variables_bounds, - threads_bounds); - source_variables_bounds.HorizontalSplitPercentage(0.70, source_bounds, - variables_bounds); - - WindowSP menubar_window_sp = - main_window_sp->CreateSubWindow("Menubar", menubar_bounds, false); - // Let the menubar get keys if the active window doesn't handle the keys - // that are typed so it can respond to menubar key presses. - menubar_window_sp->SetCanBeActive( - false); // Don't let the menubar become the active window - menubar_window_sp->SetDelegate(menubar_sp); - - WindowSP source_window_sp( - main_window_sp->CreateSubWindow("Source", source_bounds, true)); - WindowSP variables_window_sp( - main_window_sp->CreateSubWindow("Variables", variables_bounds, false)); - WindowSP threads_window_sp( - main_window_sp->CreateSubWindow("Threads", threads_bounds, false)); - WindowSP status_window_sp( - main_window_sp->CreateSubWindow("Status", status_bounds, false)); - status_window_sp->SetCanBeActive( - false); // Don't let the status bar become the active window - main_window_sp->SetDelegate( - std::static_pointer_cast(app_delegate_sp)); - source_window_sp->SetDelegate( - WindowDelegateSP(new SourceFileWindowDelegate(m_debugger))); - variables_window_sp->SetDelegate( - WindowDelegateSP(new FrameVariablesWindowDelegate(m_debugger))); - TreeDelegateSP thread_delegate_sp(new ThreadsTreeDelegate(m_debugger)); - threads_window_sp->SetDelegate(WindowDelegateSP( - new TreeWindowDelegate(m_debugger, thread_delegate_sp))); - status_window_sp->SetDelegate( - WindowDelegateSP(new StatusBarWindowDelegate(m_debugger))); - - // Show the main help window once the first time the curses GUI is launched - static bool g_showed_help = false; - if (!g_showed_help) { - g_showed_help = true; - main_window_sp->CreateHelpSubwindow(); - } - - init_pair(1, COLOR_WHITE, COLOR_BLUE); - init_pair(2, COLOR_BLACK, COLOR_WHITE); - init_pair(3, COLOR_MAGENTA, COLOR_WHITE); - init_pair(4, COLOR_MAGENTA, COLOR_BLACK); - init_pair(5, COLOR_RED, COLOR_BLACK); - } -} - -void IOHandlerCursesGUI::Deactivate() { m_app_ap->Terminate(); } - -void IOHandlerCursesGUI::Run() { - m_app_ap->Run(m_debugger); - SetIsDone(true); -} - -IOHandlerCursesGUI::~IOHandlerCursesGUI() = default; - -void IOHandlerCursesGUI::Cancel() {} - -bool IOHandlerCursesGUI::Interrupt() { return false; } - -void IOHandlerCursesGUI::GotEOF() {} - -#endif // LLDB_DISABLE_CURSES diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp new file mode 100644 index 0000000000000..a9114aa71b069 --- /dev/null +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -0,0 +1,4066 @@ +//===-- IOHandlerCursesGUI.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/IOHandlerCursesGUI.h" + +#ifndef LLDB_DISABLE_CURSES +#include +#include +#endif + +#if defined(__APPLE__) +#include +#endif +#include + +#include "lldb/Core/Debugger.h" +#include "lldb/Core/StreamFile.h" +#include "lldb/Host/File.h" +#include "lldb/Utility/Predicate.h" +#include "lldb/Utility/Status.h" +#include "lldb/Utility/StreamString.h" +#include "lldb/Utility/StringList.h" +#include "lldb/lldb-forward.h" + +#include "lldb/Interpreter/CommandCompletions.h" +#include "lldb/Interpreter/CommandInterpreter.h" + +#ifndef LLDB_DISABLE_CURSES +#include "lldb/Breakpoint/BreakpointLocation.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ValueObject.h" +#include "lldb/Core/ValueObjectRegister.h" +#include "lldb/Symbol/Block.h" +#include "lldb/Symbol/Function.h" +#include "lldb/Symbol/Symbol.h" +#include "lldb/Symbol/VariableList.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/StackFrame.h" +#include "lldb/Target/StopInfo.h" +#include "lldb/Target/Target.h" +#include "lldb/Target/Thread.h" +#include "lldb/Utility/State.h" +#endif + +#include "llvm/ADT/StringRef.h" + +#ifdef _WIN32 +#include "lldb/Host/windows/windows.h" +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace lldb; +using namespace lldb_private; +using llvm::None; +using llvm::Optional; +using llvm::StringRef; + +// we may want curses to be disabled for some builds for instance, windows +#ifndef LLDB_DISABLE_CURSES + +#define KEY_RETURN 10 +#define KEY_ESCAPE 27 + +namespace curses { +class Menu; +class MenuDelegate; +class Window; +class WindowDelegate; +typedef std::shared_ptr MenuSP; +typedef std::shared_ptr MenuDelegateSP; +typedef std::shared_ptr WindowSP; +typedef std::shared_ptr WindowDelegateSP; +typedef std::vector Menus; +typedef std::vector Windows; +typedef std::vector WindowDelegates; + +#if 0 +type summary add -s "x=${var.x}, y=${var.y}" curses::Point +type summary add -s "w=${var.width}, h=${var.height}" curses::Size +type summary add -s "${var.origin%S} ${var.size%S}" curses::Rect +#endif + +struct Point { + int x; + int y; + + Point(int _x = 0, int _y = 0) : x(_x), y(_y) {} + + void Clear() { + x = 0; + y = 0; + } + + Point &operator+=(const Point &rhs) { + x += rhs.x; + y += rhs.y; + return *this; + } + + void Dump() { printf("(x=%i, y=%i)\n", x, y); } +}; + +bool operator==(const Point &lhs, const Point &rhs) { + return lhs.x == rhs.x && lhs.y == rhs.y; +} + +bool operator!=(const Point &lhs, const Point &rhs) { + return lhs.x != rhs.x || lhs.y != rhs.y; +} + +struct Size { + int width; + int height; + Size(int w = 0, int h = 0) : width(w), height(h) {} + + void Clear() { + width = 0; + height = 0; + } + + void Dump() { printf("(w=%i, h=%i)\n", width, height); } +}; + +bool operator==(const Size &lhs, const Size &rhs) { + return lhs.width == rhs.width && lhs.height == rhs.height; +} + +bool operator!=(const Size &lhs, const Size &rhs) { + return lhs.width != rhs.width || lhs.height != rhs.height; +} + +struct Rect { + Point origin; + Size size; + + Rect() : origin(), size() {} + + Rect(const Point &p, const Size &s) : origin(p), size(s) {} + + void Clear() { + origin.Clear(); + size.Clear(); + } + + void Dump() { + printf("(x=%i, y=%i), w=%i, h=%i)\n", origin.x, origin.y, size.width, + size.height); + } + + void Inset(int w, int h) { + if (size.width > w * 2) + size.width -= w * 2; + origin.x += w; + + if (size.height > h * 2) + size.height -= h * 2; + origin.y += h; + } + + // Return a status bar rectangle which is the last line of this rectangle. + // This rectangle will be modified to not include the status bar area. + Rect MakeStatusBar() { + Rect status_bar; + if (size.height > 1) { + status_bar.origin.x = origin.x; + status_bar.origin.y = size.height; + status_bar.size.width = size.width; + status_bar.size.height = 1; + --size.height; + } + return status_bar; + } + + // Return a menubar rectangle which is the first line of this rectangle. This + // rectangle will be modified to not include the menubar area. + Rect MakeMenuBar() { + Rect menubar; + if (size.height > 1) { + menubar.origin.x = origin.x; + menubar.origin.y = origin.y; + menubar.size.width = size.width; + menubar.size.height = 1; + ++origin.y; + --size.height; + } + return menubar; + } + + void HorizontalSplitPercentage(float top_percentage, Rect &top, + Rect &bottom) const { + float top_height = top_percentage * size.height; + HorizontalSplit(top_height, top, bottom); + } + + void HorizontalSplit(int top_height, Rect &top, Rect &bottom) const { + top = *this; + if (top_height < size.height) { + top.size.height = top_height; + bottom.origin.x = origin.x; + bottom.origin.y = origin.y + top.size.height; + bottom.size.width = size.width; + bottom.size.height = size.height - top.size.height; + } else { + bottom.Clear(); + } + } + + void VerticalSplitPercentage(float left_percentage, Rect &left, + Rect &right) const { + float left_width = left_percentage * size.width; + VerticalSplit(left_width, left, right); + } + + void VerticalSplit(int left_width, Rect &left, Rect &right) const { + left = *this; + if (left_width < size.width) { + left.size.width = left_width; + right.origin.x = origin.x + left.size.width; + right.origin.y = origin.y; + right.size.width = size.width - left.size.width; + right.size.height = size.height; + } else { + right.Clear(); + } + } +}; + +bool operator==(const Rect &lhs, const Rect &rhs) { + return lhs.origin == rhs.origin && lhs.size == rhs.size; +} + +bool operator!=(const Rect &lhs, const Rect &rhs) { + return lhs.origin != rhs.origin || lhs.size != rhs.size; +} + +enum HandleCharResult { + eKeyNotHandled = 0, + eKeyHandled = 1, + eQuitApplication = 2 +}; + +enum class MenuActionResult { + Handled, + NotHandled, + Quit // Exit all menus and quit +}; + +struct KeyHelp { + int ch; + const char *description; +}; + +class WindowDelegate { +public: + virtual ~WindowDelegate() = default; + + virtual bool WindowDelegateDraw(Window &window, bool force) { + return false; // Drawing not handled + } + + virtual HandleCharResult WindowDelegateHandleChar(Window &window, int key) { + return eKeyNotHandled; + } + + virtual const char *WindowDelegateGetHelpText() { return nullptr; } + + virtual KeyHelp *WindowDelegateGetKeyHelp() { return nullptr; } +}; + +class HelpDialogDelegate : public WindowDelegate { +public: + HelpDialogDelegate(const char *text, KeyHelp *key_help_array); + + ~HelpDialogDelegate() override; + + bool WindowDelegateDraw(Window &window, bool force) override; + + HandleCharResult WindowDelegateHandleChar(Window &window, int key) override; + + size_t GetNumLines() const { return m_text.GetSize(); } + + size_t GetMaxLineLength() const { return m_text.GetMaxStringLength(); } + +protected: + StringList m_text; + int m_first_visible_line; +}; + +class Window { +public: + Window(const char *name) + : m_name(name), m_window(nullptr), m_panel(nullptr), m_parent(nullptr), + m_subwindows(), m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX), + m_prev_active_window_idx(UINT32_MAX), m_delete(false), + m_needs_update(true), m_can_activate(true), m_is_subwin(false) {} + + Window(const char *name, WINDOW *w, bool del = true) + : m_name(name), m_window(nullptr), m_panel(nullptr), m_parent(nullptr), + m_subwindows(), m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX), + m_prev_active_window_idx(UINT32_MAX), m_delete(del), + m_needs_update(true), m_can_activate(true), m_is_subwin(false) { + if (w) + Reset(w); + } + + Window(const char *name, const Rect &bounds) + : m_name(name), m_window(nullptr), m_parent(nullptr), m_subwindows(), + m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX), + m_prev_active_window_idx(UINT32_MAX), m_delete(true), + m_needs_update(true), m_can_activate(true), m_is_subwin(false) { + Reset(::newwin(bounds.size.height, bounds.size.width, bounds.origin.y, + bounds.origin.y)); + } + + virtual ~Window() { + RemoveSubWindows(); + Reset(); + } + + void Reset(WINDOW *w = nullptr, bool del = true) { + if (m_window == w) + return; + + if (m_panel) { + ::del_panel(m_panel); + m_panel = nullptr; + } + if (m_window && m_delete) { + ::delwin(m_window); + m_window = nullptr; + m_delete = false; + } + if (w) { + m_window = w; + m_panel = ::new_panel(m_window); + m_delete = del; + } + } + + void AttributeOn(attr_t attr) { ::wattron(m_window, attr); } + void AttributeOff(attr_t attr) { ::wattroff(m_window, attr); } + void Box(chtype v_char = ACS_VLINE, chtype h_char = ACS_HLINE) { + ::box(m_window, v_char, h_char); + } + void Clear() { ::wclear(m_window); } + void Erase() { ::werase(m_window); } + Rect GetBounds() { + return Rect(GetParentOrigin(), GetSize()); + } // Get the rectangle in our parent window + int GetChar() { return ::wgetch(m_window); } + int GetCursorX() { return getcurx(m_window); } + int GetCursorY() { return getcury(m_window); } + Rect GetFrame() { + return Rect(Point(), GetSize()); + } // Get our rectangle in our own coordinate system + Point GetParentOrigin() { return Point(GetParentX(), GetParentY()); } + Size GetSize() { return Size(GetWidth(), GetHeight()); } + int GetParentX() { return getparx(m_window); } + int GetParentY() { return getpary(m_window); } + int GetMaxX() { return getmaxx(m_window); } + int GetMaxY() { return getmaxy(m_window); } + int GetWidth() { return GetMaxX(); } + int GetHeight() { return GetMaxY(); } + void MoveCursor(int x, int y) { ::wmove(m_window, y, x); } + void MoveWindow(int x, int y) { MoveWindow(Point(x, y)); } + void Resize(int w, int h) { ::wresize(m_window, h, w); } + void Resize(const Size &size) { + ::wresize(m_window, size.height, size.width); + } + void PutChar(int ch) { ::waddch(m_window, ch); } + void PutCString(const char *s, int len = -1) { ::waddnstr(m_window, s, len); } + void SetBackground(int color_pair_idx) { + ::wbkgd(m_window, COLOR_PAIR(color_pair_idx)); + } + + void PutCStringTruncated(const char *s, int right_pad) { + int bytes_left = GetWidth() - GetCursorX(); + if (bytes_left > right_pad) { + bytes_left -= right_pad; + ::waddnstr(m_window, s, bytes_left); + } + } + + void MoveWindow(const Point &origin) { + const bool moving_window = origin != GetParentOrigin(); + if (m_is_subwin && moving_window) { + // Can't move subwindows, must delete and re-create + Size size = GetSize(); + Reset(::subwin(m_parent->m_window, size.height, size.width, origin.y, + origin.x), + true); + } else { + ::mvwin(m_window, origin.y, origin.x); + } + } + + void SetBounds(const Rect &bounds) { + const bool moving_window = bounds.origin != GetParentOrigin(); + if (m_is_subwin && moving_window) { + // Can't move subwindows, must delete and re-create + Reset(::subwin(m_parent->m_window, bounds.size.height, bounds.size.width, + bounds.origin.y, bounds.origin.x), + true); + } else { + if (moving_window) + MoveWindow(bounds.origin); + Resize(bounds.size); + } + } + + void Printf(const char *format, ...) __attribute__((format(printf, 2, 3))) { + va_list args; + va_start(args, format); + vwprintw(m_window, format, args); + va_end(args); + } + + void Touch() { + ::touchwin(m_window); + if (m_parent) + m_parent->Touch(); + } + + WindowSP CreateSubWindow(const char *name, const Rect &bounds, + bool make_active) { + auto get_window = [this, &bounds]() { + return m_window + ? ::subwin(m_window, bounds.size.height, bounds.size.width, + bounds.origin.y, bounds.origin.x) + : ::newwin(bounds.size.height, bounds.size.width, + bounds.origin.y, bounds.origin.x); + }; + WindowSP subwindow_sp = std::make_shared(name, get_window(), true); + subwindow_sp->m_is_subwin = subwindow_sp.operator bool(); + subwindow_sp->m_parent = this; + if (make_active) { + m_prev_active_window_idx = m_curr_active_window_idx; + m_curr_active_window_idx = m_subwindows.size(); + } + m_subwindows.push_back(subwindow_sp); + ::top_panel(subwindow_sp->m_panel); + m_needs_update = true; + return subwindow_sp; + } + + bool RemoveSubWindow(Window *window) { + Windows::iterator pos, end = m_subwindows.end(); + size_t i = 0; + for (pos = m_subwindows.begin(); pos != end; ++pos, ++i) { + if ((*pos).get() == window) { + if (m_prev_active_window_idx == i) + m_prev_active_window_idx = UINT32_MAX; + else if (m_prev_active_window_idx != UINT32_MAX && + m_prev_active_window_idx > i) + --m_prev_active_window_idx; + + if (m_curr_active_window_idx == i) + m_curr_active_window_idx = UINT32_MAX; + else if (m_curr_active_window_idx != UINT32_MAX && + m_curr_active_window_idx > i) + --m_curr_active_window_idx; + window->Erase(); + m_subwindows.erase(pos); + m_needs_update = true; + if (m_parent) + m_parent->Touch(); + else + ::touchwin(stdscr); + return true; + } + } + return false; + } + + WindowSP FindSubWindow(const char *name) { + Windows::iterator pos, end = m_subwindows.end(); + size_t i = 0; + for (pos = m_subwindows.begin(); pos != end; ++pos, ++i) { + if ((*pos)->m_name == name) + return *pos; + } + return WindowSP(); + } + + void RemoveSubWindows() { + m_curr_active_window_idx = UINT32_MAX; + m_prev_active_window_idx = UINT32_MAX; + for (Windows::iterator pos = m_subwindows.begin(); + pos != m_subwindows.end(); pos = m_subwindows.erase(pos)) { + (*pos)->Erase(); + } + if (m_parent) + m_parent->Touch(); + else + ::touchwin(stdscr); + } + + WINDOW *get() { return m_window; } + + operator WINDOW *() { return m_window; } + + // Window drawing utilities + void DrawTitleBox(const char *title, const char *bottom_message = nullptr) { + attr_t attr = 0; + if (IsActive()) + attr = A_BOLD | COLOR_PAIR(2); + else + attr = 0; + if (attr) + AttributeOn(attr); + + Box(); + MoveCursor(3, 0); + + if (title && title[0]) { + PutChar('<'); + PutCString(title); + PutChar('>'); + } + + if (bottom_message && bottom_message[0]) { + int bottom_message_length = strlen(bottom_message); + int x = GetWidth() - 3 - (bottom_message_length + 2); + + if (x > 0) { + MoveCursor(x, GetHeight() - 1); + PutChar('['); + PutCString(bottom_message); + PutChar(']'); + } else { + MoveCursor(1, GetHeight() - 1); + PutChar('['); + PutCStringTruncated(bottom_message, 1); + } + } + if (attr) + AttributeOff(attr); + } + + virtual void Draw(bool force) { + if (m_delegate_sp && m_delegate_sp->WindowDelegateDraw(*this, force)) + return; + + for (auto &subwindow_sp : m_subwindows) + subwindow_sp->Draw(force); + } + + bool CreateHelpSubwindow() { + if (m_delegate_sp) { + const char *text = m_delegate_sp->WindowDelegateGetHelpText(); + KeyHelp *key_help = m_delegate_sp->WindowDelegateGetKeyHelp(); + if ((text && text[0]) || key_help) { + std::unique_ptr help_delegate_up( + new HelpDialogDelegate(text, key_help)); + const size_t num_lines = help_delegate_up->GetNumLines(); + const size_t max_length = help_delegate_up->GetMaxLineLength(); + Rect bounds = GetBounds(); + bounds.Inset(1, 1); + if (max_length + 4 < static_cast(bounds.size.width)) { + bounds.origin.x += (bounds.size.width - max_length + 4) / 2; + bounds.size.width = max_length + 4; + } else { + if (bounds.size.width > 100) { + const int inset_w = bounds.size.width / 4; + bounds.origin.x += inset_w; + bounds.size.width -= 2 * inset_w; + } + } + + if (num_lines + 2 < static_cast(bounds.size.height)) { + bounds.origin.y += (bounds.size.height - num_lines + 2) / 2; + bounds.size.height = num_lines + 2; + } else { + if (bounds.size.height > 100) { + const int inset_h = bounds.size.height / 4; + bounds.origin.y += inset_h; + bounds.size.height -= 2 * inset_h; + } + } + WindowSP help_window_sp; + Window *parent_window = GetParent(); + if (parent_window) + help_window_sp = parent_window->CreateSubWindow("Help", bounds, true); + else + help_window_sp = CreateSubWindow("Help", bounds, true); + help_window_sp->SetDelegate( + WindowDelegateSP(help_delegate_up.release())); + return true; + } + } + return false; + } + + virtual HandleCharResult HandleChar(int key) { + // Always check the active window first + HandleCharResult result = eKeyNotHandled; + WindowSP active_window_sp = GetActiveWindow(); + if (active_window_sp) { + result = active_window_sp->HandleChar(key); + if (result != eKeyNotHandled) + return result; + } + + if (m_delegate_sp) { + result = m_delegate_sp->WindowDelegateHandleChar(*this, key); + if (result != eKeyNotHandled) + return result; + } + + // Then check for any windows that want any keys that weren't handled. This + // is typically only for a menubar. Make a copy of the subwindows in case + // any HandleChar() functions muck with the subwindows. If we don't do + // this, we can crash when iterating over the subwindows. + Windows subwindows(m_subwindows); + for (auto subwindow_sp : subwindows) { + if (!subwindow_sp->m_can_activate) { + HandleCharResult result = subwindow_sp->HandleChar(key); + if (result != eKeyNotHandled) + return result; + } + } + + return eKeyNotHandled; + } + + WindowSP GetActiveWindow() { + if (!m_subwindows.empty()) { + if (m_curr_active_window_idx >= m_subwindows.size()) { + if (m_prev_active_window_idx < m_subwindows.size()) { + m_curr_active_window_idx = m_prev_active_window_idx; + m_prev_active_window_idx = UINT32_MAX; + } else if (IsActive()) { + m_prev_active_window_idx = UINT32_MAX; + m_curr_active_window_idx = UINT32_MAX; + + // Find first window that wants to be active if this window is active + const size_t num_subwindows = m_subwindows.size(); + for (size_t i = 0; i < num_subwindows; ++i) { + if (m_subwindows[i]->GetCanBeActive()) { + m_curr_active_window_idx = i; + break; + } + } + } + } + + if (m_curr_active_window_idx < m_subwindows.size()) + return m_subwindows[m_curr_active_window_idx]; + } + return WindowSP(); + } + + bool GetCanBeActive() const { return m_can_activate; } + + void SetCanBeActive(bool b) { m_can_activate = b; } + + void SetDelegate(const WindowDelegateSP &delegate_sp) { + m_delegate_sp = delegate_sp; + } + + Window *GetParent() const { return m_parent; } + + bool IsActive() const { + if (m_parent) + return m_parent->GetActiveWindow().get() == this; + else + return true; // Top level window is always active + } + + void SelectNextWindowAsActive() { + // Move active focus to next window + const size_t num_subwindows = m_subwindows.size(); + if (m_curr_active_window_idx == UINT32_MAX) { + uint32_t idx = 0; + for (auto subwindow_sp : m_subwindows) { + if (subwindow_sp->GetCanBeActive()) { + m_curr_active_window_idx = idx; + break; + } + ++idx; + } + } else if (m_curr_active_window_idx + 1 < num_subwindows) { + bool handled = false; + m_prev_active_window_idx = m_curr_active_window_idx; + for (size_t idx = m_curr_active_window_idx + 1; idx < num_subwindows; + ++idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + handled = true; + break; + } + } + if (!handled) { + for (size_t idx = 0; idx <= m_prev_active_window_idx; ++idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + break; + } + } + } + } else { + m_prev_active_window_idx = m_curr_active_window_idx; + for (size_t idx = 0; idx < num_subwindows; ++idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + break; + } + } + } + } + + const char *GetName() const { return m_name.c_str(); } + +protected: + std::string m_name; + WINDOW *m_window; + PANEL *m_panel; + Window *m_parent; + Windows m_subwindows; + WindowDelegateSP m_delegate_sp; + uint32_t m_curr_active_window_idx; + uint32_t m_prev_active_window_idx; + bool m_delete; + bool m_needs_update; + bool m_can_activate; + bool m_is_subwin; + +private: + DISALLOW_COPY_AND_ASSIGN(Window); +}; + +class MenuDelegate { +public: + virtual ~MenuDelegate() = default; + + virtual MenuActionResult MenuDelegateAction(Menu &menu) = 0; +}; + +class Menu : public WindowDelegate { +public: + enum class Type { Invalid, Bar, Item, Separator }; + + // Menubar or separator constructor + Menu(Type type); + + // Menuitem constructor + Menu(const char *name, const char *key_name, int key_value, + uint64_t identifier); + + ~Menu() override = default; + + const MenuDelegateSP &GetDelegate() const { return m_delegate_sp; } + + void SetDelegate(const MenuDelegateSP &delegate_sp) { + m_delegate_sp = delegate_sp; + } + + void RecalculateNameLengths(); + + void AddSubmenu(const MenuSP &menu_sp); + + int DrawAndRunMenu(Window &window); + + void DrawMenuTitle(Window &window, bool highlight); + + bool WindowDelegateDraw(Window &window, bool force) override; + + HandleCharResult WindowDelegateHandleChar(Window &window, int key) override; + + MenuActionResult ActionPrivate(Menu &menu) { + MenuActionResult result = MenuActionResult::NotHandled; + if (m_delegate_sp) { + result = m_delegate_sp->MenuDelegateAction(menu); + if (result != MenuActionResult::NotHandled) + return result; + } else if (m_parent) { + result = m_parent->ActionPrivate(menu); + if (result != MenuActionResult::NotHandled) + return result; + } + return m_canned_result; + } + + MenuActionResult Action() { + // Call the recursive action so it can try to handle it with the menu + // delegate, and if not, try our parent menu + return ActionPrivate(*this); + } + + void SetCannedResult(MenuActionResult result) { m_canned_result = result; } + + Menus &GetSubmenus() { return m_submenus; } + + const Menus &GetSubmenus() const { return m_submenus; } + + int GetSelectedSubmenuIndex() const { return m_selected; } + + void SetSelectedSubmenuIndex(int idx) { m_selected = idx; } + + Type GetType() const { return m_type; } + + int GetStartingColumn() const { return m_start_col; } + + void SetStartingColumn(int col) { m_start_col = col; } + + int GetKeyValue() const { return m_key_value; } + + std::string &GetName() { return m_name; } + + int GetDrawWidth() const { + return m_max_submenu_name_length + m_max_submenu_key_name_length + 8; + } + + uint64_t GetIdentifier() const { return m_identifier; } + + void SetIdentifier(uint64_t identifier) { m_identifier = identifier; } + +protected: + std::string m_name; + std::string m_key_name; + uint64_t m_identifier; + Type m_type; + int m_key_value; + int m_start_col; + int m_max_submenu_name_length; + int m_max_submenu_key_name_length; + int m_selected; + Menu *m_parent; + Menus m_submenus; + WindowSP m_menu_window_sp; + MenuActionResult m_canned_result; + MenuDelegateSP m_delegate_sp; +}; + +// Menubar or separator constructor +Menu::Menu(Type type) + : m_name(), m_key_name(), m_identifier(0), m_type(type), m_key_value(0), + m_start_col(0), m_max_submenu_name_length(0), + m_max_submenu_key_name_length(0), m_selected(0), m_parent(nullptr), + m_submenus(), m_canned_result(MenuActionResult::NotHandled), + m_delegate_sp() {} + +// Menuitem constructor +Menu::Menu(const char *name, const char *key_name, int key_value, + uint64_t identifier) + : m_name(), m_key_name(), m_identifier(identifier), m_type(Type::Invalid), + m_key_value(key_value), m_start_col(0), m_max_submenu_name_length(0), + m_max_submenu_key_name_length(0), m_selected(0), m_parent(nullptr), + m_submenus(), m_canned_result(MenuActionResult::NotHandled), + m_delegate_sp() { + if (name && name[0]) { + m_name = name; + m_type = Type::Item; + if (key_name && key_name[0]) + m_key_name = key_name; + } else { + m_type = Type::Separator; + } +} + +void Menu::RecalculateNameLengths() { + m_max_submenu_name_length = 0; + m_max_submenu_key_name_length = 0; + Menus &submenus = GetSubmenus(); + const size_t num_submenus = submenus.size(); + for (size_t i = 0; i < num_submenus; ++i) { + Menu *submenu = submenus[i].get(); + if (static_cast(m_max_submenu_name_length) < submenu->m_name.size()) + m_max_submenu_name_length = submenu->m_name.size(); + if (static_cast(m_max_submenu_key_name_length) < + submenu->m_key_name.size()) + m_max_submenu_key_name_length = submenu->m_key_name.size(); + } +} + +void Menu::AddSubmenu(const MenuSP &menu_sp) { + menu_sp->m_parent = this; + if (static_cast(m_max_submenu_name_length) < menu_sp->m_name.size()) + m_max_submenu_name_length = menu_sp->m_name.size(); + if (static_cast(m_max_submenu_key_name_length) < + menu_sp->m_key_name.size()) + m_max_submenu_key_name_length = menu_sp->m_key_name.size(); + m_submenus.push_back(menu_sp); +} + +void Menu::DrawMenuTitle(Window &window, bool highlight) { + if (m_type == Type::Separator) { + window.MoveCursor(0, window.GetCursorY()); + window.PutChar(ACS_LTEE); + int width = window.GetWidth(); + if (width > 2) { + width -= 2; + for (int i = 0; i < width; ++i) + window.PutChar(ACS_HLINE); + } + window.PutChar(ACS_RTEE); + } else { + const int shortcut_key = m_key_value; + bool underlined_shortcut = false; + const attr_t hilgight_attr = A_REVERSE; + if (highlight) + window.AttributeOn(hilgight_attr); + if (isprint(shortcut_key)) { + size_t lower_pos = m_name.find(tolower(shortcut_key)); + size_t upper_pos = m_name.find(toupper(shortcut_key)); + const char *name = m_name.c_str(); + size_t pos = std::min(lower_pos, upper_pos); + if (pos != std::string::npos) { + underlined_shortcut = true; + if (pos > 0) { + window.PutCString(name, pos); + name += pos; + } + const attr_t shortcut_attr = A_UNDERLINE | A_BOLD; + window.AttributeOn(shortcut_attr); + window.PutChar(name[0]); + window.AttributeOff(shortcut_attr); + name++; + if (name[0]) + window.PutCString(name); + } + } + + if (!underlined_shortcut) { + window.PutCString(m_name.c_str()); + } + + if (highlight) + window.AttributeOff(hilgight_attr); + + if (m_key_name.empty()) { + if (!underlined_shortcut && isprint(m_key_value)) { + window.AttributeOn(COLOR_PAIR(3)); + window.Printf(" (%c)", m_key_value); + window.AttributeOff(COLOR_PAIR(3)); + } + } else { + window.AttributeOn(COLOR_PAIR(3)); + window.Printf(" (%s)", m_key_name.c_str()); + window.AttributeOff(COLOR_PAIR(3)); + } + } +} + +bool Menu::WindowDelegateDraw(Window &window, bool force) { + Menus &submenus = GetSubmenus(); + const size_t num_submenus = submenus.size(); + const int selected_idx = GetSelectedSubmenuIndex(); + Menu::Type menu_type = GetType(); + switch (menu_type) { + case Menu::Type::Bar: { + window.SetBackground(2); + window.MoveCursor(0, 0); + for (size_t i = 0; i < num_submenus; ++i) { + Menu *menu = submenus[i].get(); + if (i > 0) + window.PutChar(' '); + menu->SetStartingColumn(window.GetCursorX()); + window.PutCString("| "); + menu->DrawMenuTitle(window, false); + } + window.PutCString(" |"); + } break; + + case Menu::Type::Item: { + int y = 1; + int x = 3; + // Draw the menu + int cursor_x = 0; + int cursor_y = 0; + window.Erase(); + window.SetBackground(2); + window.Box(); + for (size_t i = 0; i < num_submenus; ++i) { + const bool is_selected = (i == static_cast(selected_idx)); + window.MoveCursor(x, y + i); + if (is_selected) { + // Remember where we want the cursor to be + cursor_x = x - 1; + cursor_y = y + i; + } + submenus[i]->DrawMenuTitle(window, is_selected); + } + window.MoveCursor(cursor_x, cursor_y); + } break; + + default: + case Menu::Type::Separator: + break; + } + return true; // Drawing handled... +} + +HandleCharResult Menu::WindowDelegateHandleChar(Window &window, int key) { + HandleCharResult result = eKeyNotHandled; + + Menus &submenus = GetSubmenus(); + const size_t num_submenus = submenus.size(); + const int selected_idx = GetSelectedSubmenuIndex(); + Menu::Type menu_type = GetType(); + if (menu_type == Menu::Type::Bar) { + MenuSP run_menu_sp; + switch (key) { + case KEY_DOWN: + case KEY_UP: + // Show last menu or first menu + if (selected_idx < static_cast(num_submenus)) + run_menu_sp = submenus[selected_idx]; + else if (!submenus.empty()) + run_menu_sp = submenus.front(); + result = eKeyHandled; + break; + + case KEY_RIGHT: + ++m_selected; + if (m_selected >= static_cast(num_submenus)) + m_selected = 0; + if (m_selected < static_cast(num_submenus)) + run_menu_sp = submenus[m_selected]; + else if (!submenus.empty()) + run_menu_sp = submenus.front(); + result = eKeyHandled; + break; + + case KEY_LEFT: + --m_selected; + if (m_selected < 0) + m_selected = num_submenus - 1; + if (m_selected < static_cast(num_submenus)) + run_menu_sp = submenus[m_selected]; + else if (!submenus.empty()) + run_menu_sp = submenus.front(); + result = eKeyHandled; + break; + + default: + for (size_t i = 0; i < num_submenus; ++i) { + if (submenus[i]->GetKeyValue() == key) { + SetSelectedSubmenuIndex(i); + run_menu_sp = submenus[i]; + result = eKeyHandled; + break; + } + } + break; + } + + if (run_menu_sp) { + // Run the action on this menu in case we need to populate the menu with + // dynamic content and also in case check marks, and any other menu + // decorations need to be calculated + if (run_menu_sp->Action() == MenuActionResult::Quit) + return eQuitApplication; + + Rect menu_bounds; + menu_bounds.origin.x = run_menu_sp->GetStartingColumn(); + menu_bounds.origin.y = 1; + menu_bounds.size.width = run_menu_sp->GetDrawWidth(); + menu_bounds.size.height = run_menu_sp->GetSubmenus().size() + 2; + if (m_menu_window_sp) + window.GetParent()->RemoveSubWindow(m_menu_window_sp.get()); + + m_menu_window_sp = window.GetParent()->CreateSubWindow( + run_menu_sp->GetName().c_str(), menu_bounds, true); + m_menu_window_sp->SetDelegate(run_menu_sp); + } + } else if (menu_type == Menu::Type::Item) { + switch (key) { + case KEY_DOWN: + if (m_submenus.size() > 1) { + const int start_select = m_selected; + while (++m_selected != start_select) { + if (static_cast(m_selected) >= num_submenus) + m_selected = 0; + if (m_submenus[m_selected]->GetType() == Type::Separator) + continue; + else + break; + } + return eKeyHandled; + } + break; + + case KEY_UP: + if (m_submenus.size() > 1) { + const int start_select = m_selected; + while (--m_selected != start_select) { + if (m_selected < static_cast(0)) + m_selected = num_submenus - 1; + if (m_submenus[m_selected]->GetType() == Type::Separator) + continue; + else + break; + } + return eKeyHandled; + } + break; + + case KEY_RETURN: + if (static_cast(selected_idx) < num_submenus) { + if (submenus[selected_idx]->Action() == MenuActionResult::Quit) + return eQuitApplication; + window.GetParent()->RemoveSubWindow(&window); + return eKeyHandled; + } + break; + + case KEY_ESCAPE: // Beware: pressing escape key has 1 to 2 second delay in + // case other chars are entered for escaped sequences + window.GetParent()->RemoveSubWindow(&window); + return eKeyHandled; + + default: + for (size_t i = 0; i < num_submenus; ++i) { + Menu *menu = submenus[i].get(); + if (menu->GetKeyValue() == key) { + SetSelectedSubmenuIndex(i); + window.GetParent()->RemoveSubWindow(&window); + if (menu->Action() == MenuActionResult::Quit) + return eQuitApplication; + return eKeyHandled; + } + } + break; + } + } else if (menu_type == Menu::Type::Separator) { + } + return result; +} + +class Application { +public: + Application(FILE *in, FILE *out) + : m_window_sp(), m_screen(nullptr), m_in(in), m_out(out) {} + + ~Application() { + m_window_delegates.clear(); + m_window_sp.reset(); + if (m_screen) { + ::delscreen(m_screen); + m_screen = nullptr; + } + } + + void Initialize() { + ::setlocale(LC_ALL, ""); + ::setlocale(LC_CTYPE, ""); + m_screen = ::newterm(nullptr, m_out, m_in); + ::start_color(); + ::curs_set(0); + ::noecho(); + ::keypad(stdscr, TRUE); + } + + void Terminate() { ::endwin(); } + + void Run(Debugger &debugger) { + bool done = false; + int delay_in_tenths_of_a_second = 1; + + // Alas the threading model in curses is a bit lame so we need to resort to + // polling every 0.5 seconds. We could poll for stdin ourselves and then + // pass the keys down but then we need to translate all of the escape + // sequences ourselves. So we resort to polling for input because we need + // to receive async process events while in this loop. + + halfdelay(delay_in_tenths_of_a_second); // Poll using some number of tenths + // of seconds seconds when calling + // Window::GetChar() + + ListenerSP listener_sp( + Listener::MakeListener("lldb.IOHandler.curses.Application")); + ConstString broadcaster_class_target(Target::GetStaticBroadcasterClass()); + ConstString broadcaster_class_process(Process::GetStaticBroadcasterClass()); + ConstString broadcaster_class_thread(Thread::GetStaticBroadcasterClass()); + debugger.EnableForwardEvents(listener_sp); + + bool update = true; +#if defined(__APPLE__) + std::deque escape_chars; +#endif + + while (!done) { + if (update) { + m_window_sp->Draw(false); + // All windows should be calling Window::DeferredRefresh() instead of + // Window::Refresh() so we can do a single update and avoid any screen + // blinking + update_panels(); + + // Cursor hiding isn't working on MacOSX, so hide it in the top left + // corner + m_window_sp->MoveCursor(0, 0); + + doupdate(); + update = false; + } + +#if defined(__APPLE__) + // Terminal.app doesn't map its function keys correctly, F1-F4 default + // to: \033OP, \033OQ, \033OR, \033OS, so lets take care of this here if + // possible + int ch; + if (escape_chars.empty()) + ch = m_window_sp->GetChar(); + else { + ch = escape_chars.front(); + escape_chars.pop_front(); + } + if (ch == KEY_ESCAPE) { + int ch2 = m_window_sp->GetChar(); + if (ch2 == 'O') { + int ch3 = m_window_sp->GetChar(); + switch (ch3) { + case 'P': + ch = KEY_F(1); + break; + case 'Q': + ch = KEY_F(2); + break; + case 'R': + ch = KEY_F(3); + break; + case 'S': + ch = KEY_F(4); + break; + default: + escape_chars.push_back(ch2); + if (ch3 != -1) + escape_chars.push_back(ch3); + break; + } + } else if (ch2 != -1) + escape_chars.push_back(ch2); + } +#else + int ch = m_window_sp->GetChar(); + +#endif + if (ch == -1) { + if (feof(m_in) || ferror(m_in)) { + done = true; + } else { + // Just a timeout from using halfdelay(), check for events + EventSP event_sp; + while (listener_sp->PeekAtNextEvent()) { + listener_sp->GetEvent(event_sp, std::chrono::seconds(0)); + + if (event_sp) { + Broadcaster *broadcaster = event_sp->GetBroadcaster(); + if (broadcaster) { + // uint32_t event_type = event_sp->GetType(); + ConstString broadcaster_class( + broadcaster->GetBroadcasterClass()); + if (broadcaster_class == broadcaster_class_process) { + debugger.GetCommandInterpreter().UpdateExecutionContext( + nullptr); + update = true; + continue; // Don't get any key, just update our view + } + } + } + } + } + } else { + HandleCharResult key_result = m_window_sp->HandleChar(ch); + switch (key_result) { + case eKeyHandled: + debugger.GetCommandInterpreter().UpdateExecutionContext(nullptr); + update = true; + break; + case eKeyNotHandled: + break; + case eQuitApplication: + done = true; + break; + } + } + } + + debugger.CancelForwardEvents(listener_sp); + } + + WindowSP &GetMainWindow() { + if (!m_window_sp) + m_window_sp = std::make_shared("main", stdscr, false); + return m_window_sp; + } + +protected: + WindowSP m_window_sp; + WindowDelegates m_window_delegates; + SCREEN *m_screen; + FILE *m_in; + FILE *m_out; +}; + +} // namespace curses + +using namespace curses; + +struct Row { + ValueObjectManager value; + Row *parent; + // The process stop ID when the children were calculated. + uint32_t children_stop_id; + int row_idx; + int x; + int y; + bool might_have_children; + bool expanded; + bool calculated_children; + std::vector children; + + Row(const ValueObjectSP &v, Row *p) + : value(v, lldb::eDynamicDontRunTarget, true), parent(p), row_idx(0), + x(1), y(1), might_have_children(v ? v->MightHaveChildren() : false), + expanded(false), calculated_children(false), children() {} + + size_t GetDepth() const { + if (parent) + return 1 + parent->GetDepth(); + return 0; + } + + void Expand() { expanded = true; } + + std::vector &GetChildren() { + ProcessSP process_sp = value.GetProcessSP(); + auto stop_id = process_sp->GetStopID(); + if (process_sp && stop_id != children_stop_id) { + children_stop_id = stop_id; + calculated_children = false; + } + if (!calculated_children) { + children.clear(); + calculated_children = true; + ValueObjectSP valobj = value.GetSP(); + if (valobj) { + const size_t num_children = valobj->GetNumChildren(); + for (size_t i = 0; i < num_children; ++i) { + children.push_back(Row(valobj->GetChildAtIndex(i, true), this)); + } + } + } + return children; + } + + void Unexpand() { + expanded = false; + calculated_children = false; + children.clear(); + } + + void DrawTree(Window &window) { + if (parent) + parent->DrawTreeForChild(window, this, 0); + + if (might_have_children) { + // It we can get UTF8 characters to work we should try to use the + // "symbol" UTF8 string below + // const char *symbol = ""; + // if (row.expanded) + // symbol = "\xe2\x96\xbd "; + // else + // symbol = "\xe2\x96\xb7 "; + // window.PutCString (symbol); + + // The ACS_DARROW and ACS_RARROW don't look very nice they are just a 'v' + // or '>' character... + // if (expanded) + // window.PutChar (ACS_DARROW); + // else + // window.PutChar (ACS_RARROW); + // Since we can't find any good looking right arrow/down arrow symbols, + // just use a diamond... + window.PutChar(ACS_DIAMOND); + window.PutChar(ACS_HLINE); + } + } + + void DrawTreeForChild(Window &window, Row *child, uint32_t reverse_depth) { + if (parent) + parent->DrawTreeForChild(window, this, reverse_depth + 1); + + if (&GetChildren().back() == child) { + // Last child + if (reverse_depth == 0) { + window.PutChar(ACS_LLCORNER); + window.PutChar(ACS_HLINE); + } else { + window.PutChar(' '); + window.PutChar(' '); + } + } else { + if (reverse_depth == 0) { + window.PutChar(ACS_LTEE); + window.PutChar(ACS_HLINE); + } else { + window.PutChar(ACS_VLINE); + window.PutChar(' '); + } + } + } +}; + +struct DisplayOptions { + bool show_types; +}; + +class TreeItem; + +class TreeDelegate { +public: + TreeDelegate() = default; + virtual ~TreeDelegate() = default; + + virtual void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) = 0; + virtual void TreeDelegateGenerateChildren(TreeItem &item) = 0; + virtual bool TreeDelegateItemSelected( + TreeItem &item) = 0; // Return true if we need to update views +}; + +typedef std::shared_ptr TreeDelegateSP; + +class TreeItem { +public: + TreeItem(TreeItem *parent, TreeDelegate &delegate, bool might_have_children) + : m_parent(parent), m_delegate(delegate), m_user_data(nullptr), + m_identifier(0), m_row_idx(-1), m_children(), + m_might_have_children(might_have_children), m_is_expanded(false) {} + + TreeItem &operator=(const TreeItem &rhs) { + if (this != &rhs) { + m_parent = rhs.m_parent; + m_delegate = rhs.m_delegate; + m_user_data = rhs.m_user_data; + m_identifier = rhs.m_identifier; + m_row_idx = rhs.m_row_idx; + m_children = rhs.m_children; + m_might_have_children = rhs.m_might_have_children; + m_is_expanded = rhs.m_is_expanded; + } + return *this; + } + + TreeItem(const TreeItem &) = default; + + size_t GetDepth() const { + if (m_parent) + return 1 + m_parent->GetDepth(); + return 0; + } + + int GetRowIndex() const { return m_row_idx; } + + void ClearChildren() { m_children.clear(); } + + void Resize(size_t n, const TreeItem &t) { m_children.resize(n, t); } + + TreeItem &operator[](size_t i) { return m_children[i]; } + + void SetRowIndex(int row_idx) { m_row_idx = row_idx; } + + size_t GetNumChildren() { + m_delegate.TreeDelegateGenerateChildren(*this); + return m_children.size(); + } + + void ItemWasSelected() { m_delegate.TreeDelegateItemSelected(*this); } + + void CalculateRowIndexes(int &row_idx) { + SetRowIndex(row_idx); + ++row_idx; + + const bool expanded = IsExpanded(); + + // The root item must calculate its children, or we must calculate the + // number of children if the item is expanded + if (m_parent == nullptr || expanded) + GetNumChildren(); + + for (auto &item : m_children) { + if (expanded) + item.CalculateRowIndexes(row_idx); + else + item.SetRowIndex(-1); + } + } + + TreeItem *GetParent() { return m_parent; } + + bool IsExpanded() const { return m_is_expanded; } + + void Expand() { m_is_expanded = true; } + + void Unexpand() { m_is_expanded = false; } + + bool Draw(Window &window, const int first_visible_row, + const uint32_t selected_row_idx, int &row_idx, int &num_rows_left) { + if (num_rows_left <= 0) + return false; + + if (m_row_idx >= first_visible_row) { + window.MoveCursor(2, row_idx + 1); + + if (m_parent) + m_parent->DrawTreeForChild(window, this, 0); + + if (m_might_have_children) { + // It we can get UTF8 characters to work we should try to use the + // "symbol" UTF8 string below + // const char *symbol = ""; + // if (row.expanded) + // symbol = "\xe2\x96\xbd "; + // else + // symbol = "\xe2\x96\xb7 "; + // window.PutCString (symbol); + + // The ACS_DARROW and ACS_RARROW don't look very nice they are just a + // 'v' or '>' character... + // if (expanded) + // window.PutChar (ACS_DARROW); + // else + // window.PutChar (ACS_RARROW); + // Since we can't find any good looking right arrow/down arrow symbols, + // just use a diamond... + window.PutChar(ACS_DIAMOND); + window.PutChar(ACS_HLINE); + } + bool highlight = (selected_row_idx == static_cast(m_row_idx)) && + window.IsActive(); + + if (highlight) + window.AttributeOn(A_REVERSE); + + m_delegate.TreeDelegateDrawTreeItem(*this, window); + + if (highlight) + window.AttributeOff(A_REVERSE); + ++row_idx; + --num_rows_left; + } + + if (num_rows_left <= 0) + return false; // We are done drawing... + + if (IsExpanded()) { + for (auto &item : m_children) { + // If we displayed all the rows and item.Draw() returns false we are + // done drawing and can exit this for loop + if (!item.Draw(window, first_visible_row, selected_row_idx, row_idx, + num_rows_left)) + break; + } + } + return num_rows_left >= 0; // Return true if not done drawing yet + } + + void DrawTreeForChild(Window &window, TreeItem *child, + uint32_t reverse_depth) { + if (m_parent) + m_parent->DrawTreeForChild(window, this, reverse_depth + 1); + + if (&m_children.back() == child) { + // Last child + if (reverse_depth == 0) { + window.PutChar(ACS_LLCORNER); + window.PutChar(ACS_HLINE); + } else { + window.PutChar(' '); + window.PutChar(' '); + } + } else { + if (reverse_depth == 0) { + window.PutChar(ACS_LTEE); + window.PutChar(ACS_HLINE); + } else { + window.PutChar(ACS_VLINE); + window.PutChar(' '); + } + } + } + + TreeItem *GetItemForRowIndex(uint32_t row_idx) { + if (static_cast(m_row_idx) == row_idx) + return this; + if (m_children.empty()) + return nullptr; + if (IsExpanded()) { + for (auto &item : m_children) { + TreeItem *selected_item_ptr = item.GetItemForRowIndex(row_idx); + if (selected_item_ptr) + return selected_item_ptr; + } + } + return nullptr; + } + + void *GetUserData() const { return m_user_data; } + + void SetUserData(void *user_data) { m_user_data = user_data; } + + uint64_t GetIdentifier() const { return m_identifier; } + + void SetIdentifier(uint64_t identifier) { m_identifier = identifier; } + + void SetMightHaveChildren(bool b) { m_might_have_children = b; } + +protected: + TreeItem *m_parent; + TreeDelegate &m_delegate; + void *m_user_data; + uint64_t m_identifier; + int m_row_idx; // Zero based visible row index, -1 if not visible or for the + // root item + std::vector m_children; + bool m_might_have_children; + bool m_is_expanded; +}; + +class TreeWindowDelegate : public WindowDelegate { +public: + TreeWindowDelegate(Debugger &debugger, const TreeDelegateSP &delegate_sp) + : m_debugger(debugger), m_delegate_sp(delegate_sp), + m_root(nullptr, *delegate_sp, true), m_selected_item(nullptr), + m_num_rows(0), m_selected_row_idx(0), m_first_visible_row(0), + m_min_x(0), m_min_y(0), m_max_x(0), m_max_y(0) {} + + int NumVisibleRows() const { return m_max_y - m_min_y; } + + bool WindowDelegateDraw(Window &window, bool force) override { + ExecutionContext exe_ctx( + m_debugger.GetCommandInterpreter().GetExecutionContext()); + Process *process = exe_ctx.GetProcessPtr(); + + bool display_content = false; + if (process) { + StateType state = process->GetState(); + if (StateIsStoppedState(state, true)) { + // We are stopped, so it is ok to + display_content = true; + } else if (StateIsRunningState(state)) { + return true; // Don't do any updating when we are running + } + } + + m_min_x = 2; + m_min_y = 1; + m_max_x = window.GetWidth() - 1; + m_max_y = window.GetHeight() - 1; + + window.Erase(); + window.DrawTitleBox(window.GetName()); + + if (display_content) { + const int num_visible_rows = NumVisibleRows(); + m_num_rows = 0; + m_root.CalculateRowIndexes(m_num_rows); + + // If we unexpanded while having something selected our total number of + // rows is less than the num visible rows, then make sure we show all the + // rows by setting the first visible row accordingly. + if (m_first_visible_row > 0 && m_num_rows < num_visible_rows) + m_first_visible_row = 0; + + // Make sure the selected row is always visible + if (m_selected_row_idx < m_first_visible_row) + m_first_visible_row = m_selected_row_idx; + else if (m_first_visible_row + num_visible_rows <= m_selected_row_idx) + m_first_visible_row = m_selected_row_idx - num_visible_rows + 1; + + int row_idx = 0; + int num_rows_left = num_visible_rows; + m_root.Draw(window, m_first_visible_row, m_selected_row_idx, row_idx, + num_rows_left); + // Get the selected row + m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); + } else { + m_selected_item = nullptr; + } + + return true; // Drawing handled + } + + const char *WindowDelegateGetHelpText() override { + return "Thread window keyboard shortcuts:"; + } + + KeyHelp *WindowDelegateGetKeyHelp() override { + static curses::KeyHelp g_source_view_key_help[] = { + {KEY_UP, "Select previous item"}, + {KEY_DOWN, "Select next item"}, + {KEY_RIGHT, "Expand the selected item"}, + {KEY_LEFT, + "Unexpand the selected item or select parent if not expanded"}, + {KEY_PPAGE, "Page up"}, + {KEY_NPAGE, "Page down"}, + {'h', "Show help dialog"}, + {' ', "Toggle item expansion"}, + {',', "Page up"}, + {'.', "Page down"}, + {'\0', nullptr}}; + return g_source_view_key_help; + } + + HandleCharResult WindowDelegateHandleChar(Window &window, int c) override { + switch (c) { + case ',': + case KEY_PPAGE: + // Page up key + if (m_first_visible_row > 0) { + if (m_first_visible_row > m_max_y) + m_first_visible_row -= m_max_y; + else + m_first_visible_row = 0; + m_selected_row_idx = m_first_visible_row; + m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); + if (m_selected_item) + m_selected_item->ItemWasSelected(); + } + return eKeyHandled; + + case '.': + case KEY_NPAGE: + // Page down key + if (m_num_rows > m_max_y) { + if (m_first_visible_row + m_max_y < m_num_rows) { + m_first_visible_row += m_max_y; + m_selected_row_idx = m_first_visible_row; + m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); + if (m_selected_item) + m_selected_item->ItemWasSelected(); + } + } + return eKeyHandled; + + case KEY_UP: + if (m_selected_row_idx > 0) { + --m_selected_row_idx; + m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); + if (m_selected_item) + m_selected_item->ItemWasSelected(); + } + return eKeyHandled; + + case KEY_DOWN: + if (m_selected_row_idx + 1 < m_num_rows) { + ++m_selected_row_idx; + m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); + if (m_selected_item) + m_selected_item->ItemWasSelected(); + } + return eKeyHandled; + + case KEY_RIGHT: + if (m_selected_item) { + if (!m_selected_item->IsExpanded()) + m_selected_item->Expand(); + } + return eKeyHandled; + + case KEY_LEFT: + if (m_selected_item) { + if (m_selected_item->IsExpanded()) + m_selected_item->Unexpand(); + else if (m_selected_item->GetParent()) { + m_selected_row_idx = m_selected_item->GetParent()->GetRowIndex(); + m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx); + if (m_selected_item) + m_selected_item->ItemWasSelected(); + } + } + return eKeyHandled; + + case ' ': + // Toggle expansion state when SPACE is pressed + if (m_selected_item) { + if (m_selected_item->IsExpanded()) + m_selected_item->Unexpand(); + else + m_selected_item->Expand(); + } + return eKeyHandled; + + case 'h': + window.CreateHelpSubwindow(); + return eKeyHandled; + + default: + break; + } + return eKeyNotHandled; + } + +protected: + Debugger &m_debugger; + TreeDelegateSP m_delegate_sp; + TreeItem m_root; + TreeItem *m_selected_item; + int m_num_rows; + int m_selected_row_idx; + int m_first_visible_row; + int m_min_x; + int m_min_y; + int m_max_x; + int m_max_y; +}; + +class FrameTreeDelegate : public TreeDelegate { +public: + FrameTreeDelegate() : TreeDelegate() { + FormatEntity::Parse( + "frame #${frame.index}: {${function.name}${function.pc-offset}}}", + m_format); + } + + ~FrameTreeDelegate() override = default; + + void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override { + Thread *thread = (Thread *)item.GetUserData(); + if (thread) { + const uint64_t frame_idx = item.GetIdentifier(); + StackFrameSP frame_sp = thread->GetStackFrameAtIndex(frame_idx); + if (frame_sp) { + StreamString strm; + const SymbolContext &sc = + frame_sp->GetSymbolContext(eSymbolContextEverything); + ExecutionContext exe_ctx(frame_sp); + if (FormatEntity::Format(m_format, strm, &sc, &exe_ctx, nullptr, + nullptr, false, false)) { + int right_pad = 1; + window.PutCStringTruncated(strm.GetString().str().c_str(), right_pad); + } + } + } + } + + void TreeDelegateGenerateChildren(TreeItem &item) override { + // No children for frames yet... + } + + bool TreeDelegateItemSelected(TreeItem &item) override { + Thread *thread = (Thread *)item.GetUserData(); + if (thread) { + thread->GetProcess()->GetThreadList().SetSelectedThreadByID( + thread->GetID()); + const uint64_t frame_idx = item.GetIdentifier(); + thread->SetSelectedFrameByIndex(frame_idx); + return true; + } + return false; + } + +protected: + FormatEntity::Entry m_format; +}; + +class ThreadTreeDelegate : public TreeDelegate { +public: + ThreadTreeDelegate(Debugger &debugger) + : TreeDelegate(), m_debugger(debugger), m_tid(LLDB_INVALID_THREAD_ID), + m_stop_id(UINT32_MAX) { + FormatEntity::Parse("thread #${thread.index}: tid = ${thread.id}{, stop " + "reason = ${thread.stop-reason}}", + m_format); + } + + ~ThreadTreeDelegate() override = default; + + ProcessSP GetProcess() { + return m_debugger.GetCommandInterpreter() + .GetExecutionContext() + .GetProcessSP(); + } + + ThreadSP GetThread(const TreeItem &item) { + ProcessSP process_sp = GetProcess(); + if (process_sp) + return process_sp->GetThreadList().FindThreadByID(item.GetIdentifier()); + return ThreadSP(); + } + + void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override { + ThreadSP thread_sp = GetThread(item); + if (thread_sp) { + StreamString strm; + ExecutionContext exe_ctx(thread_sp); + if (FormatEntity::Format(m_format, strm, nullptr, &exe_ctx, nullptr, + nullptr, false, false)) { + int right_pad = 1; + window.PutCStringTruncated(strm.GetString().str().c_str(), right_pad); + } + } + } + + void TreeDelegateGenerateChildren(TreeItem &item) override { + ProcessSP process_sp = GetProcess(); + if (process_sp && process_sp->IsAlive()) { + StateType state = process_sp->GetState(); + if (StateIsStoppedState(state, true)) { + ThreadSP thread_sp = GetThread(item); + if (thread_sp) { + if (m_stop_id == process_sp->GetStopID() && + thread_sp->GetID() == m_tid) + return; // Children are already up to date + if (!m_frame_delegate_sp) { + // Always expand the thread item the first time we show it + m_frame_delegate_sp = std::make_shared(); + } + + m_stop_id = process_sp->GetStopID(); + m_tid = thread_sp->GetID(); + + TreeItem t(&item, *m_frame_delegate_sp, false); + size_t num_frames = thread_sp->GetStackFrameCount(); + item.Resize(num_frames, t); + for (size_t i = 0; i < num_frames; ++i) { + item[i].SetUserData(thread_sp.get()); + item[i].SetIdentifier(i); + } + } + return; + } + } + item.ClearChildren(); + } + + bool TreeDelegateItemSelected(TreeItem &item) override { + ProcessSP process_sp = GetProcess(); + if (process_sp && process_sp->IsAlive()) { + StateType state = process_sp->GetState(); + if (StateIsStoppedState(state, true)) { + ThreadSP thread_sp = GetThread(item); + if (thread_sp) { + ThreadList &thread_list = thread_sp->GetProcess()->GetThreadList(); + std::lock_guard guard(thread_list.GetMutex()); + ThreadSP selected_thread_sp = thread_list.GetSelectedThread(); + if (selected_thread_sp->GetID() != thread_sp->GetID()) { + thread_list.SetSelectedThreadByID(thread_sp->GetID()); + return true; + } + } + } + } + return false; + } + +protected: + Debugger &m_debugger; + std::shared_ptr m_frame_delegate_sp; + lldb::user_id_t m_tid; + uint32_t m_stop_id; + FormatEntity::Entry m_format; +}; + +class ThreadsTreeDelegate : public TreeDelegate { +public: + ThreadsTreeDelegate(Debugger &debugger) + : TreeDelegate(), m_thread_delegate_sp(), m_debugger(debugger), + m_stop_id(UINT32_MAX) { + FormatEntity::Parse("process ${process.id}{, name = ${process.name}}", + m_format); + } + + ~ThreadsTreeDelegate() override = default; + + ProcessSP GetProcess() { + return m_debugger.GetCommandInterpreter() + .GetExecutionContext() + .GetProcessSP(); + } + + void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override { + ProcessSP process_sp = GetProcess(); + if (process_sp && process_sp->IsAlive()) { + StreamString strm; + ExecutionContext exe_ctx(process_sp); + if (FormatEntity::Format(m_format, strm, nullptr, &exe_ctx, nullptr, + nullptr, false, false)) { + int right_pad = 1; + window.PutCStringTruncated(strm.GetString().str().c_str(), right_pad); + } + } + } + + void TreeDelegateGenerateChildren(TreeItem &item) override { + ProcessSP process_sp = GetProcess(); + if (process_sp && process_sp->IsAlive()) { + StateType state = process_sp->GetState(); + if (StateIsStoppedState(state, true)) { + const uint32_t stop_id = process_sp->GetStopID(); + if (m_stop_id == stop_id) + return; // Children are already up to date + + m_stop_id = stop_id; + + if (!m_thread_delegate_sp) { + // Always expand the thread item the first time we show it + // item.Expand(); + m_thread_delegate_sp = + std::make_shared(m_debugger); + } + + TreeItem t(&item, *m_thread_delegate_sp, false); + ThreadList &threads = process_sp->GetThreadList(); + std::lock_guard guard(threads.GetMutex()); + size_t num_threads = threads.GetSize(); + item.Resize(num_threads, t); + for (size_t i = 0; i < num_threads; ++i) { + item[i].SetIdentifier(threads.GetThreadAtIndex(i)->GetID()); + item[i].SetMightHaveChildren(true); + } + return; + } + } + item.ClearChildren(); + } + + bool TreeDelegateItemSelected(TreeItem &item) override { return false; } + +protected: + std::shared_ptr m_thread_delegate_sp; + Debugger &m_debugger; + uint32_t m_stop_id; + FormatEntity::Entry m_format; +}; + +class ValueObjectListDelegate : public WindowDelegate { +public: + ValueObjectListDelegate() + : m_rows(), m_selected_row(nullptr), m_selected_row_idx(0), + m_first_visible_row(0), m_num_rows(0), m_max_x(0), m_max_y(0) {} + + ValueObjectListDelegate(ValueObjectList &valobj_list) + : m_rows(), m_selected_row(nullptr), m_selected_row_idx(0), + m_first_visible_row(0), m_num_rows(0), m_max_x(0), m_max_y(0) { + SetValues(valobj_list); + } + + ~ValueObjectListDelegate() override = default; + + void SetValues(ValueObjectList &valobj_list) { + m_selected_row = nullptr; + m_selected_row_idx = 0; + m_first_visible_row = 0; + m_num_rows = 0; + m_rows.clear(); + for (auto &valobj_sp : valobj_list.GetObjects()) + m_rows.push_back(Row(valobj_sp, nullptr)); + } + + bool WindowDelegateDraw(Window &window, bool force) override { + m_num_rows = 0; + m_min_x = 2; + m_min_y = 1; + m_max_x = window.GetWidth() - 1; + m_max_y = window.GetHeight() - 1; + + window.Erase(); + window.DrawTitleBox(window.GetName()); + + const int num_visible_rows = NumVisibleRows(); + const int num_rows = CalculateTotalNumberRows(m_rows); + + // If we unexpanded while having something selected our total number of + // rows is less than the num visible rows, then make sure we show all the + // rows by setting the first visible row accordingly. + if (m_first_visible_row > 0 && num_rows < num_visible_rows) + m_first_visible_row = 0; + + // Make sure the selected row is always visible + if (m_selected_row_idx < m_first_visible_row) + m_first_visible_row = m_selected_row_idx; + else if (m_first_visible_row + num_visible_rows <= m_selected_row_idx) + m_first_visible_row = m_selected_row_idx - num_visible_rows + 1; + + DisplayRows(window, m_rows, g_options); + + // Get the selected row + m_selected_row = GetRowForRowIndex(m_selected_row_idx); + // Keep the cursor on the selected row so the highlight and the cursor are + // always on the same line + if (m_selected_row) + window.MoveCursor(m_selected_row->x, m_selected_row->y); + + return true; // Drawing handled + } + + KeyHelp *WindowDelegateGetKeyHelp() override { + static curses::KeyHelp g_source_view_key_help[] = { + {KEY_UP, "Select previous item"}, + {KEY_DOWN, "Select next item"}, + {KEY_RIGHT, "Expand selected item"}, + {KEY_LEFT, "Unexpand selected item or select parent if not expanded"}, + {KEY_PPAGE, "Page up"}, + {KEY_NPAGE, "Page down"}, + {'A', "Format as annotated address"}, + {'b', "Format as binary"}, + {'B', "Format as hex bytes with ASCII"}, + {'c', "Format as character"}, + {'d', "Format as a signed integer"}, + {'D', "Format selected value using the default format for the type"}, + {'f', "Format as float"}, + {'h', "Show help dialog"}, + {'i', "Format as instructions"}, + {'o', "Format as octal"}, + {'p', "Format as pointer"}, + {'s', "Format as C string"}, + {'t', "Toggle showing/hiding type names"}, + {'u', "Format as an unsigned integer"}, + {'x', "Format as hex"}, + {'X', "Format as uppercase hex"}, + {' ', "Toggle item expansion"}, + {',', "Page up"}, + {'.', "Page down"}, + {'\0', nullptr}}; + return g_source_view_key_help; + } + + HandleCharResult WindowDelegateHandleChar(Window &window, int c) override { + switch (c) { + case 'x': + case 'X': + case 'o': + case 's': + case 'u': + case 'd': + case 'D': + case 'i': + case 'A': + case 'p': + case 'c': + case 'b': + case 'B': + case 'f': + // Change the format for the currently selected item + if (m_selected_row) { + auto valobj_sp = m_selected_row->value.GetSP(); + if (valobj_sp) + valobj_sp->SetFormat(FormatForChar(c)); + } + return eKeyHandled; + + case 't': + // Toggle showing type names + g_options.show_types = !g_options.show_types; + return eKeyHandled; + + case ',': + case KEY_PPAGE: + // Page up key + if (m_first_visible_row > 0) { + if (static_cast(m_first_visible_row) > m_max_y) + m_first_visible_row -= m_max_y; + else + m_first_visible_row = 0; + m_selected_row_idx = m_first_visible_row; + } + return eKeyHandled; + + case '.': + case KEY_NPAGE: + // Page down key + if (m_num_rows > static_cast(m_max_y)) { + if (m_first_visible_row + m_max_y < m_num_rows) { + m_first_visible_row += m_max_y; + m_selected_row_idx = m_first_visible_row; + } + } + return eKeyHandled; + + case KEY_UP: + if (m_selected_row_idx > 0) + --m_selected_row_idx; + return eKeyHandled; + + case KEY_DOWN: + if (m_selected_row_idx + 1 < m_num_rows) + ++m_selected_row_idx; + return eKeyHandled; + + case KEY_RIGHT: + if (m_selected_row) { + if (!m_selected_row->expanded) + m_selected_row->Expand(); + } + return eKeyHandled; + + case KEY_LEFT: + if (m_selected_row) { + if (m_selected_row->expanded) + m_selected_row->Unexpand(); + else if (m_selected_row->parent) + m_selected_row_idx = m_selected_row->parent->row_idx; + } + return eKeyHandled; + + case ' ': + // Toggle expansion state when SPACE is pressed + if (m_selected_row) { + if (m_selected_row->expanded) + m_selected_row->Unexpand(); + else + m_selected_row->Expand(); + } + return eKeyHandled; + + case 'h': + window.CreateHelpSubwindow(); + return eKeyHandled; + + default: + break; + } + return eKeyNotHandled; + } + +protected: + std::vector m_rows; + Row *m_selected_row; + uint32_t m_selected_row_idx; + uint32_t m_first_visible_row; + uint32_t m_num_rows; + int m_min_x; + int m_min_y; + int m_max_x; + int m_max_y; + + static Format FormatForChar(int c) { + switch (c) { + case 'x': + return eFormatHex; + case 'X': + return eFormatHexUppercase; + case 'o': + return eFormatOctal; + case 's': + return eFormatCString; + case 'u': + return eFormatUnsigned; + case 'd': + return eFormatDecimal; + case 'D': + return eFormatDefault; + case 'i': + return eFormatInstruction; + case 'A': + return eFormatAddressInfo; + case 'p': + return eFormatPointer; + case 'c': + return eFormatChar; + case 'b': + return eFormatBinary; + case 'B': + return eFormatBytesWithASCII; + case 'f': + return eFormatFloat; + } + return eFormatDefault; + } + + bool DisplayRowObject(Window &window, Row &row, DisplayOptions &options, + bool highlight, bool last_child) { + ValueObject *valobj = row.value.GetSP().get(); + + if (valobj == nullptr) + return false; + + const char *type_name = + options.show_types ? valobj->GetTypeName().GetCString() : nullptr; + const char *name = valobj->GetName().GetCString(); + const char *value = valobj->GetValueAsCString(); + const char *summary = valobj->GetSummaryAsCString(); + + window.MoveCursor(row.x, row.y); + + row.DrawTree(window); + + if (highlight) + window.AttributeOn(A_REVERSE); + + if (type_name && type_name[0]) + window.Printf("(%s) ", type_name); + + if (name && name[0]) + window.PutCString(name); + + attr_t changd_attr = 0; + if (valobj->GetValueDidChange()) + changd_attr = COLOR_PAIR(5) | A_BOLD; + + if (value && value[0]) { + window.PutCString(" = "); + if (changd_attr) + window.AttributeOn(changd_attr); + window.PutCString(value); + if (changd_attr) + window.AttributeOff(changd_attr); + } + + if (summary && summary[0]) { + window.PutChar(' '); + if (changd_attr) + window.AttributeOn(changd_attr); + window.PutCString(summary); + if (changd_attr) + window.AttributeOff(changd_attr); + } + + if (highlight) + window.AttributeOff(A_REVERSE); + + return true; + } + + void DisplayRows(Window &window, std::vector &rows, + DisplayOptions &options) { + // > 0x25B7 + // \/ 0x25BD + + bool window_is_active = window.IsActive(); + for (auto &row : rows) { + const bool last_child = row.parent && &rows[rows.size() - 1] == &row; + // Save the row index in each Row structure + row.row_idx = m_num_rows; + if ((m_num_rows >= m_first_visible_row) && + ((m_num_rows - m_first_visible_row) < + static_cast(NumVisibleRows()))) { + row.x = m_min_x; + row.y = m_num_rows - m_first_visible_row + 1; + if (DisplayRowObject(window, row, options, + window_is_active && + m_num_rows == m_selected_row_idx, + last_child)) { + ++m_num_rows; + } else { + row.x = 0; + row.y = 0; + } + } else { + row.x = 0; + row.y = 0; + ++m_num_rows; + } + + auto &children = row.GetChildren(); + if (row.expanded && !children.empty()) { + DisplayRows(window, children, options); + } + } + } + + int CalculateTotalNumberRows(std::vector &rows) { + int row_count = 0; + for (auto &row : rows) { + ++row_count; + if (row.expanded) + row_count += CalculateTotalNumberRows(row.GetChildren()); + } + return row_count; + } + + static Row *GetRowForRowIndexImpl(std::vector &rows, size_t &row_index) { + for (auto &row : rows) { + if (row_index == 0) + return &row; + else { + --row_index; + auto &children = row.GetChildren(); + if (row.expanded && !children.empty()) { + Row *result = GetRowForRowIndexImpl(children, row_index); + if (result) + return result; + } + } + } + return nullptr; + } + + Row *GetRowForRowIndex(size_t row_index) { + return GetRowForRowIndexImpl(m_rows, row_index); + } + + int NumVisibleRows() const { return m_max_y - m_min_y; } + + static DisplayOptions g_options; +}; + +class FrameVariablesWindowDelegate : public ValueObjectListDelegate { +public: + FrameVariablesWindowDelegate(Debugger &debugger) + : ValueObjectListDelegate(), m_debugger(debugger), + m_frame_block(nullptr) {} + + ~FrameVariablesWindowDelegate() override = default; + + const char *WindowDelegateGetHelpText() override { + return "Frame variable window keyboard shortcuts:"; + } + + bool WindowDelegateDraw(Window &window, bool force) override { + ExecutionContext exe_ctx( + m_debugger.GetCommandInterpreter().GetExecutionContext()); + Process *process = exe_ctx.GetProcessPtr(); + Block *frame_block = nullptr; + StackFrame *frame = nullptr; + + if (process) { + StateType state = process->GetState(); + if (StateIsStoppedState(state, true)) { + frame = exe_ctx.GetFramePtr(); + if (frame) + frame_block = frame->GetFrameBlock(); + } else if (StateIsRunningState(state)) { + return true; // Don't do any updating when we are running + } + } + + ValueObjectList local_values; + if (frame_block) { + // Only update the variables if they have changed + if (m_frame_block != frame_block) { + m_frame_block = frame_block; + + VariableList *locals = frame->GetVariableList(true); + if (locals) { + const DynamicValueType use_dynamic = eDynamicDontRunTarget; + for (const VariableSP &local_sp : *locals) { + ValueObjectSP value_sp = + frame->GetValueObjectForFrameVariable(local_sp, use_dynamic); + if (value_sp) { + ValueObjectSP synthetic_value_sp = value_sp->GetSyntheticValue(); + if (synthetic_value_sp) + local_values.Append(synthetic_value_sp); + else + local_values.Append(value_sp); + } + } + // Update the values + SetValues(local_values); + } + } + } else { + m_frame_block = nullptr; + // Update the values with an empty list if there is no frame + SetValues(local_values); + } + + return ValueObjectListDelegate::WindowDelegateDraw(window, force); + } + +protected: + Debugger &m_debugger; + Block *m_frame_block; +}; + +class RegistersWindowDelegate : public ValueObjectListDelegate { +public: + RegistersWindowDelegate(Debugger &debugger) + : ValueObjectListDelegate(), m_debugger(debugger) {} + + ~RegistersWindowDelegate() override = default; + + const char *WindowDelegateGetHelpText() override { + return "Register window keyboard shortcuts:"; + } + + bool WindowDelegateDraw(Window &window, bool force) override { + ExecutionContext exe_ctx( + m_debugger.GetCommandInterpreter().GetExecutionContext()); + StackFrame *frame = exe_ctx.GetFramePtr(); + + ValueObjectList value_list; + if (frame) { + if (frame->GetStackID() != m_stack_id) { + m_stack_id = frame->GetStackID(); + RegisterContextSP reg_ctx(frame->GetRegisterContext()); + if (reg_ctx) { + const uint32_t num_sets = reg_ctx->GetRegisterSetCount(); + for (uint32_t set_idx = 0; set_idx < num_sets; ++set_idx) { + value_list.Append( + ValueObjectRegisterSet::Create(frame, reg_ctx, set_idx)); + } + } + SetValues(value_list); + } + } else { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive()) + return true; // Don't do any updating if we are running + else { + // Update the values with an empty list if there is no process or the + // process isn't alive anymore + SetValues(value_list); + } + } + return ValueObjectListDelegate::WindowDelegateDraw(window, force); + } + +protected: + Debugger &m_debugger; + StackID m_stack_id; +}; + +static const char *CursesKeyToCString(int ch) { + static char g_desc[32]; + if (ch >= KEY_F0 && ch < KEY_F0 + 64) { + snprintf(g_desc, sizeof(g_desc), "F%u", ch - KEY_F0); + return g_desc; + } + switch (ch) { + case KEY_DOWN: + return "down"; + case KEY_UP: + return "up"; + case KEY_LEFT: + return "left"; + case KEY_RIGHT: + return "right"; + case KEY_HOME: + return "home"; + case KEY_BACKSPACE: + return "backspace"; + case KEY_DL: + return "delete-line"; + case KEY_IL: + return "insert-line"; + case KEY_DC: + return "delete-char"; + case KEY_IC: + return "insert-char"; + case KEY_CLEAR: + return "clear"; + case KEY_EOS: + return "clear-to-eos"; + case KEY_EOL: + return "clear-to-eol"; + case KEY_SF: + return "scroll-forward"; + case KEY_SR: + return "scroll-backward"; + case KEY_NPAGE: + return "page-down"; + case KEY_PPAGE: + return "page-up"; + case KEY_STAB: + return "set-tab"; + case KEY_CTAB: + return "clear-tab"; + case KEY_CATAB: + return "clear-all-tabs"; + case KEY_ENTER: + return "enter"; + case KEY_PRINT: + return "print"; + case KEY_LL: + return "lower-left key"; + case KEY_A1: + return "upper left of keypad"; + case KEY_A3: + return "upper right of keypad"; + case KEY_B2: + return "center of keypad"; + case KEY_C1: + return "lower left of keypad"; + case KEY_C3: + return "lower right of keypad"; + case KEY_BTAB: + return "back-tab key"; + case KEY_BEG: + return "begin key"; + case KEY_CANCEL: + return "cancel key"; + case KEY_CLOSE: + return "close key"; + case KEY_COMMAND: + return "command key"; + case KEY_COPY: + return "copy key"; + case KEY_CREATE: + return "create key"; + case KEY_END: + return "end key"; + case KEY_EXIT: + return "exit key"; + case KEY_FIND: + return "find key"; + case KEY_HELP: + return "help key"; + case KEY_MARK: + return "mark key"; + case KEY_MESSAGE: + return "message key"; + case KEY_MOVE: + return "move key"; + case KEY_NEXT: + return "next key"; + case KEY_OPEN: + return "open key"; + case KEY_OPTIONS: + return "options key"; + case KEY_PREVIOUS: + return "previous key"; + case KEY_REDO: + return "redo key"; + case KEY_REFERENCE: + return "reference key"; + case KEY_REFRESH: + return "refresh key"; + case KEY_REPLACE: + return "replace key"; + case KEY_RESTART: + return "restart key"; + case KEY_RESUME: + return "resume key"; + case KEY_SAVE: + return "save key"; + case KEY_SBEG: + return "shifted begin key"; + case KEY_SCANCEL: + return "shifted cancel key"; + case KEY_SCOMMAND: + return "shifted command key"; + case KEY_SCOPY: + return "shifted copy key"; + case KEY_SCREATE: + return "shifted create key"; + case KEY_SDC: + return "shifted delete-character key"; + case KEY_SDL: + return "shifted delete-line key"; + case KEY_SELECT: + return "select key"; + case KEY_SEND: + return "shifted end key"; + case KEY_SEOL: + return "shifted clear-to-end-of-line key"; + case KEY_SEXIT: + return "shifted exit key"; + case KEY_SFIND: + return "shifted find key"; + case KEY_SHELP: + return "shifted help key"; + case KEY_SHOME: + return "shifted home key"; + case KEY_SIC: + return "shifted insert-character key"; + case KEY_SLEFT: + return "shifted left-arrow key"; + case KEY_SMESSAGE: + return "shifted message key"; + case KEY_SMOVE: + return "shifted move key"; + case KEY_SNEXT: + return "shifted next key"; + case KEY_SOPTIONS: + return "shifted options key"; + case KEY_SPREVIOUS: + return "shifted previous key"; + case KEY_SPRINT: + return "shifted print key"; + case KEY_SREDO: + return "shifted redo key"; + case KEY_SREPLACE: + return "shifted replace key"; + case KEY_SRIGHT: + return "shifted right-arrow key"; + case KEY_SRSUME: + return "shifted resume key"; + case KEY_SSAVE: + return "shifted save key"; + case KEY_SSUSPEND: + return "shifted suspend key"; + case KEY_SUNDO: + return "shifted undo key"; + case KEY_SUSPEND: + return "suspend key"; + case KEY_UNDO: + return "undo key"; + case KEY_MOUSE: + return "Mouse event has occurred"; + case KEY_RESIZE: + return "Terminal resize event"; +#ifdef KEY_EVENT + case KEY_EVENT: + return "We were interrupted by an event"; +#endif + case KEY_RETURN: + return "return"; + case ' ': + return "space"; + case '\t': + return "tab"; + case KEY_ESCAPE: + return "escape"; + default: + if (isprint(ch)) + snprintf(g_desc, sizeof(g_desc), "%c", ch); + else + snprintf(g_desc, sizeof(g_desc), "\\x%2.2x", ch); + return g_desc; + } + return nullptr; +} + +HelpDialogDelegate::HelpDialogDelegate(const char *text, + KeyHelp *key_help_array) + : m_text(), m_first_visible_line(0) { + if (text && text[0]) { + m_text.SplitIntoLines(text); + m_text.AppendString(""); + } + if (key_help_array) { + for (KeyHelp *key = key_help_array; key->ch; ++key) { + StreamString key_description; + key_description.Printf("%10s - %s", CursesKeyToCString(key->ch), + key->description); + m_text.AppendString(key_description.GetString()); + } + } +} + +HelpDialogDelegate::~HelpDialogDelegate() = default; + +bool HelpDialogDelegate::WindowDelegateDraw(Window &window, bool force) { + window.Erase(); + const int window_height = window.GetHeight(); + int x = 2; + int y = 1; + const int min_y = y; + const int max_y = window_height - 1 - y; + const size_t num_visible_lines = max_y - min_y + 1; + const size_t num_lines = m_text.GetSize(); + const char *bottom_message; + if (num_lines <= num_visible_lines) + bottom_message = "Press any key to exit"; + else + bottom_message = "Use arrows to scroll, any other key to exit"; + window.DrawTitleBox(window.GetName(), bottom_message); + while (y <= max_y) { + window.MoveCursor(x, y); + window.PutCStringTruncated( + m_text.GetStringAtIndex(m_first_visible_line + y - min_y), 1); + ++y; + } + return true; +} + +HandleCharResult HelpDialogDelegate::WindowDelegateHandleChar(Window &window, + int key) { + bool done = false; + const size_t num_lines = m_text.GetSize(); + const size_t num_visible_lines = window.GetHeight() - 2; + + if (num_lines <= num_visible_lines) { + done = true; + // If we have all lines visible and don't need scrolling, then any key + // press will cause us to exit + } else { + switch (key) { + case KEY_UP: + if (m_first_visible_line > 0) + --m_first_visible_line; + break; + + case KEY_DOWN: + if (m_first_visible_line + num_visible_lines < num_lines) + ++m_first_visible_line; + break; + + case KEY_PPAGE: + case ',': + if (m_first_visible_line > 0) { + if (static_cast(m_first_visible_line) >= num_visible_lines) + m_first_visible_line -= num_visible_lines; + else + m_first_visible_line = 0; + } + break; + + case KEY_NPAGE: + case '.': + if (m_first_visible_line + num_visible_lines < num_lines) { + m_first_visible_line += num_visible_lines; + if (static_cast(m_first_visible_line) > num_lines) + m_first_visible_line = num_lines - num_visible_lines; + } + break; + + default: + done = true; + break; + } + } + if (done) + window.GetParent()->RemoveSubWindow(&window); + return eKeyHandled; +} + +class ApplicationDelegate : public WindowDelegate, public MenuDelegate { +public: + enum { + eMenuID_LLDB = 1, + eMenuID_LLDBAbout, + eMenuID_LLDBExit, + + eMenuID_Target, + eMenuID_TargetCreate, + eMenuID_TargetDelete, + + eMenuID_Process, + eMenuID_ProcessAttach, + eMenuID_ProcessDetach, + eMenuID_ProcessLaunch, + eMenuID_ProcessContinue, + eMenuID_ProcessHalt, + eMenuID_ProcessKill, + + eMenuID_Thread, + eMenuID_ThreadStepIn, + eMenuID_ThreadStepOver, + eMenuID_ThreadStepOut, + + eMenuID_View, + eMenuID_ViewBacktrace, + eMenuID_ViewRegisters, + eMenuID_ViewSource, + eMenuID_ViewVariables, + + eMenuID_Help, + eMenuID_HelpGUIHelp + }; + + ApplicationDelegate(Application &app, Debugger &debugger) + : WindowDelegate(), MenuDelegate(), m_app(app), m_debugger(debugger) {} + + ~ApplicationDelegate() override = default; + + bool WindowDelegateDraw(Window &window, bool force) override { + return false; // Drawing not handled, let standard window drawing happen + } + + HandleCharResult WindowDelegateHandleChar(Window &window, int key) override { + switch (key) { + case '\t': + window.SelectNextWindowAsActive(); + return eKeyHandled; + + case 'h': + window.CreateHelpSubwindow(); + return eKeyHandled; + + case KEY_ESCAPE: + return eQuitApplication; + + default: + break; + } + return eKeyNotHandled; + } + + const char *WindowDelegateGetHelpText() override { + return "Welcome to the LLDB curses GUI.\n\n" + "Press the TAB key to change the selected view.\n" + "Each view has its own keyboard shortcuts, press 'h' to open a " + "dialog to display them.\n\n" + "Common key bindings for all views:"; + } + + KeyHelp *WindowDelegateGetKeyHelp() override { + static curses::KeyHelp g_source_view_key_help[] = { + {'\t', "Select next view"}, + {'h', "Show help dialog with view specific key bindings"}, + {',', "Page up"}, + {'.', "Page down"}, + {KEY_UP, "Select previous"}, + {KEY_DOWN, "Select next"}, + {KEY_LEFT, "Unexpand or select parent"}, + {KEY_RIGHT, "Expand"}, + {KEY_PPAGE, "Page up"}, + {KEY_NPAGE, "Page down"}, + {'\0', nullptr}}; + return g_source_view_key_help; + } + + MenuActionResult MenuDelegateAction(Menu &menu) override { + switch (menu.GetIdentifier()) { + case eMenuID_ThreadStepIn: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasThreadScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive() && + StateIsStoppedState(process->GetState(), true)) + exe_ctx.GetThreadRef().StepIn(true); + } + } + return MenuActionResult::Handled; + + case eMenuID_ThreadStepOut: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasThreadScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive() && + StateIsStoppedState(process->GetState(), true)) + exe_ctx.GetThreadRef().StepOut(); + } + } + return MenuActionResult::Handled; + + case eMenuID_ThreadStepOver: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasThreadScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive() && + StateIsStoppedState(process->GetState(), true)) + exe_ctx.GetThreadRef().StepOver(true); + } + } + return MenuActionResult::Handled; + + case eMenuID_ProcessContinue: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive() && + StateIsStoppedState(process->GetState(), true)) + process->Resume(); + } + } + return MenuActionResult::Handled; + + case eMenuID_ProcessKill: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive()) + process->Destroy(false); + } + } + return MenuActionResult::Handled; + + case eMenuID_ProcessHalt: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive()) + process->Halt(); + } + } + return MenuActionResult::Handled; + + case eMenuID_ProcessDetach: { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) { + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive()) + process->Detach(false); + } + } + return MenuActionResult::Handled; + + case eMenuID_Process: { + // Populate the menu with all of the threads if the process is stopped + // when the Process menu gets selected and is about to display its + // submenu. + Menus &submenus = menu.GetSubmenus(); + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + Process *process = exe_ctx.GetProcessPtr(); + if (process && process->IsAlive() && + StateIsStoppedState(process->GetState(), true)) { + if (submenus.size() == 7) + menu.AddSubmenu(MenuSP(new Menu(Menu::Type::Separator))); + else if (submenus.size() > 8) + submenus.erase(submenus.begin() + 8, submenus.end()); + + ThreadList &threads = process->GetThreadList(); + std::lock_guard guard(threads.GetMutex()); + size_t num_threads = threads.GetSize(); + for (size_t i = 0; i < num_threads; ++i) { + ThreadSP thread_sp = threads.GetThreadAtIndex(i); + char menu_char = '\0'; + if (i < 9) + menu_char = '1' + i; + StreamString thread_menu_title; + thread_menu_title.Printf("Thread %u", thread_sp->GetIndexID()); + const char *thread_name = thread_sp->GetName(); + if (thread_name && thread_name[0]) + thread_menu_title.Printf(" %s", thread_name); + else { + const char *queue_name = thread_sp->GetQueueName(); + if (queue_name && queue_name[0]) + thread_menu_title.Printf(" %s", queue_name); + } + menu.AddSubmenu( + MenuSP(new Menu(thread_menu_title.GetString().str().c_str(), + nullptr, menu_char, thread_sp->GetID()))); + } + } else if (submenus.size() > 7) { + // Remove the separator and any other thread submenu items that were + // previously added + submenus.erase(submenus.begin() + 7, submenus.end()); + } + // Since we are adding and removing items we need to recalculate the name + // lengths + menu.RecalculateNameLengths(); + } + return MenuActionResult::Handled; + + case eMenuID_ViewVariables: { + WindowSP main_window_sp = m_app.GetMainWindow(); + WindowSP source_window_sp = main_window_sp->FindSubWindow("Source"); + WindowSP variables_window_sp = main_window_sp->FindSubWindow("Variables"); + WindowSP registers_window_sp = main_window_sp->FindSubWindow("Registers"); + const Rect source_bounds = source_window_sp->GetBounds(); + + if (variables_window_sp) { + const Rect variables_bounds = variables_window_sp->GetBounds(); + + main_window_sp->RemoveSubWindow(variables_window_sp.get()); + + if (registers_window_sp) { + // We have a registers window, so give all the area back to the + // registers window + Rect registers_bounds = variables_bounds; + registers_bounds.size.width = source_bounds.size.width; + registers_window_sp->SetBounds(registers_bounds); + } else { + // We have no registers window showing so give the bottom area back + // to the source view + source_window_sp->Resize(source_bounds.size.width, + source_bounds.size.height + + variables_bounds.size.height); + } + } else { + Rect new_variables_rect; + if (registers_window_sp) { + // We have a registers window so split the area of the registers + // window into two columns where the left hand side will be the + // variables and the right hand side will be the registers + const Rect variables_bounds = registers_window_sp->GetBounds(); + Rect new_registers_rect; + variables_bounds.VerticalSplitPercentage(0.50, new_variables_rect, + new_registers_rect); + registers_window_sp->SetBounds(new_registers_rect); + } else { + // No variables window, grab the bottom part of the source window + Rect new_source_rect; + source_bounds.HorizontalSplitPercentage(0.70, new_source_rect, + new_variables_rect); + source_window_sp->SetBounds(new_source_rect); + } + WindowSP new_window_sp = main_window_sp->CreateSubWindow( + "Variables", new_variables_rect, false); + new_window_sp->SetDelegate( + WindowDelegateSP(new FrameVariablesWindowDelegate(m_debugger))); + } + touchwin(stdscr); + } + return MenuActionResult::Handled; + + case eMenuID_ViewRegisters: { + WindowSP main_window_sp = m_app.GetMainWindow(); + WindowSP source_window_sp = main_window_sp->FindSubWindow("Source"); + WindowSP variables_window_sp = main_window_sp->FindSubWindow("Variables"); + WindowSP registers_window_sp = main_window_sp->FindSubWindow("Registers"); + const Rect source_bounds = source_window_sp->GetBounds(); + + if (registers_window_sp) { + if (variables_window_sp) { + const Rect variables_bounds = variables_window_sp->GetBounds(); + + // We have a variables window, so give all the area back to the + // variables window + variables_window_sp->Resize(variables_bounds.size.width + + registers_window_sp->GetWidth(), + variables_bounds.size.height); + } else { + // We have no variables window showing so give the bottom area back + // to the source view + source_window_sp->Resize(source_bounds.size.width, + source_bounds.size.height + + registers_window_sp->GetHeight()); + } + main_window_sp->RemoveSubWindow(registers_window_sp.get()); + } else { + Rect new_regs_rect; + if (variables_window_sp) { + // We have a variables window, split it into two columns where the + // left hand side will be the variables and the right hand side will + // be the registers + const Rect variables_bounds = variables_window_sp->GetBounds(); + Rect new_vars_rect; + variables_bounds.VerticalSplitPercentage(0.50, new_vars_rect, + new_regs_rect); + variables_window_sp->SetBounds(new_vars_rect); + } else { + // No registers window, grab the bottom part of the source window + Rect new_source_rect; + source_bounds.HorizontalSplitPercentage(0.70, new_source_rect, + new_regs_rect); + source_window_sp->SetBounds(new_source_rect); + } + WindowSP new_window_sp = + main_window_sp->CreateSubWindow("Registers", new_regs_rect, false); + new_window_sp->SetDelegate( + WindowDelegateSP(new RegistersWindowDelegate(m_debugger))); + } + touchwin(stdscr); + } + return MenuActionResult::Handled; + + case eMenuID_HelpGUIHelp: + m_app.GetMainWindow()->CreateHelpSubwindow(); + return MenuActionResult::Handled; + + default: + break; + } + + return MenuActionResult::NotHandled; + } + +protected: + Application &m_app; + Debugger &m_debugger; +}; + +class StatusBarWindowDelegate : public WindowDelegate { +public: + StatusBarWindowDelegate(Debugger &debugger) : m_debugger(debugger) { + FormatEntity::Parse("Thread: ${thread.id%tid}", m_format); + } + + ~StatusBarWindowDelegate() override = default; + + bool WindowDelegateDraw(Window &window, bool force) override { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + Process *process = exe_ctx.GetProcessPtr(); + Thread *thread = exe_ctx.GetThreadPtr(); + StackFrame *frame = exe_ctx.GetFramePtr(); + window.Erase(); + window.SetBackground(2); + window.MoveCursor(0, 0); + if (process) { + const StateType state = process->GetState(); + window.Printf("Process: %5" PRIu64 " %10s", process->GetID(), + StateAsCString(state)); + + if (StateIsStoppedState(state, true)) { + StreamString strm; + if (thread && FormatEntity::Format(m_format, strm, nullptr, &exe_ctx, + nullptr, nullptr, false, false)) { + window.MoveCursor(40, 0); + window.PutCStringTruncated(strm.GetString().str().c_str(), 1); + } + + window.MoveCursor(60, 0); + if (frame) + window.Printf("Frame: %3u PC = 0x%16.16" PRIx64, + frame->GetFrameIndex(), + frame->GetFrameCodeAddress().GetOpcodeLoadAddress( + exe_ctx.GetTargetPtr())); + } else if (state == eStateExited) { + const char *exit_desc = process->GetExitDescription(); + const int exit_status = process->GetExitStatus(); + if (exit_desc && exit_desc[0]) + window.Printf(" with status = %i (%s)", exit_status, exit_desc); + else + window.Printf(" with status = %i", exit_status); + } + } + return true; + } + +protected: + Debugger &m_debugger; + FormatEntity::Entry m_format; +}; + +class SourceFileWindowDelegate : public WindowDelegate { +public: + SourceFileWindowDelegate(Debugger &debugger) + : WindowDelegate(), m_debugger(debugger), m_sc(), m_file_sp(), + m_disassembly_scope(nullptr), m_disassembly_sp(), m_disassembly_range(), + m_title(), m_line_width(4), m_selected_line(0), m_pc_line(0), + m_stop_id(0), m_frame_idx(UINT32_MAX), m_first_visible_line(0), + m_min_x(0), m_min_y(0), m_max_x(0), m_max_y(0) {} + + ~SourceFileWindowDelegate() override = default; + + void Update(const SymbolContext &sc) { m_sc = sc; } + + uint32_t NumVisibleLines() const { return m_max_y - m_min_y; } + + const char *WindowDelegateGetHelpText() override { + return "Source/Disassembly window keyboard shortcuts:"; + } + + KeyHelp *WindowDelegateGetKeyHelp() override { + static curses::KeyHelp g_source_view_key_help[] = { + {KEY_RETURN, "Run to selected line with one shot breakpoint"}, + {KEY_UP, "Select previous source line"}, + {KEY_DOWN, "Select next source line"}, + {KEY_PPAGE, "Page up"}, + {KEY_NPAGE, "Page down"}, + {'b', "Set breakpoint on selected source/disassembly line"}, + {'c', "Continue process"}, + {'d', "Detach and resume process"}, + {'D', "Detach with process suspended"}, + {'h', "Show help dialog"}, + {'k', "Kill process"}, + {'n', "Step over (source line)"}, + {'N', "Step over (single instruction)"}, + {'o', "Step out"}, + {'s', "Step in (source line)"}, + {'S', "Step in (single instruction)"}, + {',', "Page up"}, + {'.', "Page down"}, + {'\0', nullptr}}; + return g_source_view_key_help; + } + + bool WindowDelegateDraw(Window &window, bool force) override { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + Process *process = exe_ctx.GetProcessPtr(); + Thread *thread = nullptr; + + bool update_location = false; + if (process) { + StateType state = process->GetState(); + if (StateIsStoppedState(state, true)) { + // We are stopped, so it is ok to + update_location = true; + } + } + + m_min_x = 1; + m_min_y = 2; + m_max_x = window.GetMaxX() - 1; + m_max_y = window.GetMaxY() - 1; + + const uint32_t num_visible_lines = NumVisibleLines(); + StackFrameSP frame_sp; + bool set_selected_line_to_pc = false; + + if (update_location) { + const bool process_alive = process ? process->IsAlive() : false; + bool thread_changed = false; + if (process_alive) { + thread = exe_ctx.GetThreadPtr(); + if (thread) { + frame_sp = thread->GetSelectedFrame(); + auto tid = thread->GetID(); + thread_changed = tid != m_tid; + m_tid = tid; + } else { + if (m_tid != LLDB_INVALID_THREAD_ID) { + thread_changed = true; + m_tid = LLDB_INVALID_THREAD_ID; + } + } + } + const uint32_t stop_id = process ? process->GetStopID() : 0; + const bool stop_id_changed = stop_id != m_stop_id; + bool frame_changed = false; + m_stop_id = stop_id; + m_title.Clear(); + if (frame_sp) { + m_sc = frame_sp->GetSymbolContext(eSymbolContextEverything); + if (m_sc.module_sp) { + m_title.Printf( + "%s", m_sc.module_sp->GetFileSpec().GetFilename().GetCString()); + ConstString func_name = m_sc.GetFunctionName(); + if (func_name) + m_title.Printf("`%s", func_name.GetCString()); + } + const uint32_t frame_idx = frame_sp->GetFrameIndex(); + frame_changed = frame_idx != m_frame_idx; + m_frame_idx = frame_idx; + } else { + m_sc.Clear(true); + frame_changed = m_frame_idx != UINT32_MAX; + m_frame_idx = UINT32_MAX; + } + + const bool context_changed = + thread_changed || frame_changed || stop_id_changed; + + if (process_alive) { + if (m_sc.line_entry.IsValid()) { + m_pc_line = m_sc.line_entry.line; + if (m_pc_line != UINT32_MAX) + --m_pc_line; // Convert to zero based line number... + // Update the selected line if the stop ID changed... + if (context_changed) + m_selected_line = m_pc_line; + + if (m_file_sp && m_file_sp->GetFileSpec() == m_sc.line_entry.file) { + // Same file, nothing to do, we should either have the lines or not + // (source file missing) + if (m_selected_line >= static_cast(m_first_visible_line)) { + if (m_selected_line >= m_first_visible_line + num_visible_lines) + m_first_visible_line = m_selected_line - 10; + } else { + if (m_selected_line > 10) + m_first_visible_line = m_selected_line - 10; + else + m_first_visible_line = 0; + } + } else { + // File changed, set selected line to the line with the PC + m_selected_line = m_pc_line; + m_file_sp = + m_debugger.GetSourceManager().GetFile(m_sc.line_entry.file); + if (m_file_sp) { + const size_t num_lines = m_file_sp->GetNumLines(); + m_line_width = 1; + for (size_t n = num_lines; n >= 10; n = n / 10) + ++m_line_width; + + if (num_lines < num_visible_lines || + m_selected_line < num_visible_lines) + m_first_visible_line = 0; + else + m_first_visible_line = m_selected_line - 10; + } + } + } else { + m_file_sp.reset(); + } + + if (!m_file_sp || m_file_sp->GetNumLines() == 0) { + // Show disassembly + bool prefer_file_cache = false; + if (m_sc.function) { + if (m_disassembly_scope != m_sc.function) { + m_disassembly_scope = m_sc.function; + m_disassembly_sp = m_sc.function->GetInstructions( + exe_ctx, nullptr, prefer_file_cache); + if (m_disassembly_sp) { + set_selected_line_to_pc = true; + m_disassembly_range = m_sc.function->GetAddressRange(); + } else { + m_disassembly_range.Clear(); + } + } else { + set_selected_line_to_pc = context_changed; + } + } else if (m_sc.symbol) { + if (m_disassembly_scope != m_sc.symbol) { + m_disassembly_scope = m_sc.symbol; + m_disassembly_sp = m_sc.symbol->GetInstructions( + exe_ctx, nullptr, prefer_file_cache); + if (m_disassembly_sp) { + set_selected_line_to_pc = true; + m_disassembly_range.GetBaseAddress() = + m_sc.symbol->GetAddress(); + m_disassembly_range.SetByteSize(m_sc.symbol->GetByteSize()); + } else { + m_disassembly_range.Clear(); + } + } else { + set_selected_line_to_pc = context_changed; + } + } + } + } else { + m_pc_line = UINT32_MAX; + } + } + + const int window_width = window.GetWidth(); + window.Erase(); + window.DrawTitleBox("Sources"); + if (!m_title.GetString().empty()) { + window.AttributeOn(A_REVERSE); + window.MoveCursor(1, 1); + window.PutChar(' '); + window.PutCStringTruncated(m_title.GetString().str().c_str(), 1); + int x = window.GetCursorX(); + if (x < window_width - 1) { + window.Printf("%*s", window_width - x - 1, ""); + } + window.AttributeOff(A_REVERSE); + } + + Target *target = exe_ctx.GetTargetPtr(); + const size_t num_source_lines = GetNumSourceLines(); + if (num_source_lines > 0) { + // Display source + BreakpointLines bp_lines; + if (target) { + BreakpointList &bp_list = target->GetBreakpointList(); + const size_t num_bps = bp_list.GetSize(); + for (size_t bp_idx = 0; bp_idx < num_bps; ++bp_idx) { + BreakpointSP bp_sp = bp_list.GetBreakpointAtIndex(bp_idx); + const size_t num_bps_locs = bp_sp->GetNumLocations(); + for (size_t bp_loc_idx = 0; bp_loc_idx < num_bps_locs; ++bp_loc_idx) { + BreakpointLocationSP bp_loc_sp = + bp_sp->GetLocationAtIndex(bp_loc_idx); + LineEntry bp_loc_line_entry; + if (bp_loc_sp->GetAddress().CalculateSymbolContextLineEntry( + bp_loc_line_entry)) { + if (m_file_sp->GetFileSpec() == bp_loc_line_entry.file) { + bp_lines.insert(bp_loc_line_entry.line); + } + } + } + } + } + + const attr_t selected_highlight_attr = A_REVERSE; + const attr_t pc_highlight_attr = COLOR_PAIR(1); + + for (size_t i = 0; i < num_visible_lines; ++i) { + const uint32_t curr_line = m_first_visible_line + i; + if (curr_line < num_source_lines) { + const int line_y = m_min_y + i; + window.MoveCursor(1, line_y); + const bool is_pc_line = curr_line == m_pc_line; + const bool line_is_selected = m_selected_line == curr_line; + // Highlight the line as the PC line first, then if the selected line + // isn't the same as the PC line, highlight it differently + attr_t highlight_attr = 0; + attr_t bp_attr = 0; + if (is_pc_line) + highlight_attr = pc_highlight_attr; + else if (line_is_selected) + highlight_attr = selected_highlight_attr; + + if (bp_lines.find(curr_line + 1) != bp_lines.end()) + bp_attr = COLOR_PAIR(2); + + if (bp_attr) + window.AttributeOn(bp_attr); + + window.Printf(" %*u ", m_line_width, curr_line + 1); + + if (bp_attr) + window.AttributeOff(bp_attr); + + window.PutChar(ACS_VLINE); + // Mark the line with the PC with a diamond + if (is_pc_line) + window.PutChar(ACS_DIAMOND); + else + window.PutChar(' '); + + if (highlight_attr) + window.AttributeOn(highlight_attr); + const uint32_t line_len = + m_file_sp->GetLineLength(curr_line + 1, false); + if (line_len > 0) + window.PutCString(m_file_sp->PeekLineData(curr_line + 1), line_len); + + if (is_pc_line && frame_sp && + frame_sp->GetConcreteFrameIndex() == 0) { + StopInfoSP stop_info_sp; + if (thread) + stop_info_sp = thread->GetStopInfo(); + if (stop_info_sp) { + const char *stop_description = stop_info_sp->GetDescription(); + if (stop_description && stop_description[0]) { + size_t stop_description_len = strlen(stop_description); + int desc_x = window_width - stop_description_len - 16; + window.Printf("%*s", desc_x - window.GetCursorX(), ""); + // window.MoveCursor(window_width - stop_description_len - 15, + // line_y); + window.Printf("<<< Thread %u: %s ", thread->GetIndexID(), + stop_description); + } + } else { + window.Printf("%*s", window_width - window.GetCursorX() - 1, ""); + } + } + if (highlight_attr) + window.AttributeOff(highlight_attr); + } else { + break; + } + } + } else { + size_t num_disassembly_lines = GetNumDisassemblyLines(); + if (num_disassembly_lines > 0) { + // Display disassembly + BreakpointAddrs bp_file_addrs; + Target *target = exe_ctx.GetTargetPtr(); + if (target) { + BreakpointList &bp_list = target->GetBreakpointList(); + const size_t num_bps = bp_list.GetSize(); + for (size_t bp_idx = 0; bp_idx < num_bps; ++bp_idx) { + BreakpointSP bp_sp = bp_list.GetBreakpointAtIndex(bp_idx); + const size_t num_bps_locs = bp_sp->GetNumLocations(); + for (size_t bp_loc_idx = 0; bp_loc_idx < num_bps_locs; + ++bp_loc_idx) { + BreakpointLocationSP bp_loc_sp = + bp_sp->GetLocationAtIndex(bp_loc_idx); + LineEntry bp_loc_line_entry; + const lldb::addr_t file_addr = + bp_loc_sp->GetAddress().GetFileAddress(); + if (file_addr != LLDB_INVALID_ADDRESS) { + if (m_disassembly_range.ContainsFileAddress(file_addr)) + bp_file_addrs.insert(file_addr); + } + } + } + } + + const attr_t selected_highlight_attr = A_REVERSE; + const attr_t pc_highlight_attr = COLOR_PAIR(1); + + StreamString strm; + + InstructionList &insts = m_disassembly_sp->GetInstructionList(); + Address pc_address; + + if (frame_sp) + pc_address = frame_sp->GetFrameCodeAddress(); + const uint32_t pc_idx = + pc_address.IsValid() + ? insts.GetIndexOfInstructionAtAddress(pc_address) + : UINT32_MAX; + if (set_selected_line_to_pc) { + m_selected_line = pc_idx; + } + + const uint32_t non_visible_pc_offset = (num_visible_lines / 5); + if (static_cast(m_first_visible_line) >= num_disassembly_lines) + m_first_visible_line = 0; + + if (pc_idx < num_disassembly_lines) { + if (pc_idx < static_cast(m_first_visible_line) || + pc_idx >= m_first_visible_line + num_visible_lines) + m_first_visible_line = pc_idx - non_visible_pc_offset; + } + + for (size_t i = 0; i < num_visible_lines; ++i) { + const uint32_t inst_idx = m_first_visible_line + i; + Instruction *inst = insts.GetInstructionAtIndex(inst_idx).get(); + if (!inst) + break; + + const int line_y = m_min_y + i; + window.MoveCursor(1, line_y); + const bool is_pc_line = frame_sp && inst_idx == pc_idx; + const bool line_is_selected = m_selected_line == inst_idx; + // Highlight the line as the PC line first, then if the selected line + // isn't the same as the PC line, highlight it differently + attr_t highlight_attr = 0; + attr_t bp_attr = 0; + if (is_pc_line) + highlight_attr = pc_highlight_attr; + else if (line_is_selected) + highlight_attr = selected_highlight_attr; + + if (bp_file_addrs.find(inst->GetAddress().GetFileAddress()) != + bp_file_addrs.end()) + bp_attr = COLOR_PAIR(2); + + if (bp_attr) + window.AttributeOn(bp_attr); + + window.Printf(" 0x%16.16llx ", + static_cast( + inst->GetAddress().GetLoadAddress(target))); + + if (bp_attr) + window.AttributeOff(bp_attr); + + window.PutChar(ACS_VLINE); + // Mark the line with the PC with a diamond + if (is_pc_line) + window.PutChar(ACS_DIAMOND); + else + window.PutChar(' '); + + if (highlight_attr) + window.AttributeOn(highlight_attr); + + const char *mnemonic = inst->GetMnemonic(&exe_ctx); + const char *operands = inst->GetOperands(&exe_ctx); + const char *comment = inst->GetComment(&exe_ctx); + + if (mnemonic != nullptr && mnemonic[0] == '\0') + mnemonic = nullptr; + if (operands != nullptr && operands[0] == '\0') + operands = nullptr; + if (comment != nullptr && comment[0] == '\0') + comment = nullptr; + + strm.Clear(); + + if (mnemonic != nullptr && operands != nullptr && comment != nullptr) + strm.Printf("%-8s %-25s ; %s", mnemonic, operands, comment); + else if (mnemonic != nullptr && operands != nullptr) + strm.Printf("%-8s %s", mnemonic, operands); + else if (mnemonic != nullptr) + strm.Printf("%s", mnemonic); + + int right_pad = 1; + window.PutCStringTruncated(strm.GetData(), right_pad); + + if (is_pc_line && frame_sp && + frame_sp->GetConcreteFrameIndex() == 0) { + StopInfoSP stop_info_sp; + if (thread) + stop_info_sp = thread->GetStopInfo(); + if (stop_info_sp) { + const char *stop_description = stop_info_sp->GetDescription(); + if (stop_description && stop_description[0]) { + size_t stop_description_len = strlen(stop_description); + int desc_x = window_width - stop_description_len - 16; + window.Printf("%*s", desc_x - window.GetCursorX(), ""); + // window.MoveCursor(window_width - stop_description_len - 15, + // line_y); + window.Printf("<<< Thread %u: %s ", thread->GetIndexID(), + stop_description); + } + } else { + window.Printf("%*s", window_width - window.GetCursorX() - 1, ""); + } + } + if (highlight_attr) + window.AttributeOff(highlight_attr); + } + } + } + return true; // Drawing handled + } + + size_t GetNumLines() { + size_t num_lines = GetNumSourceLines(); + if (num_lines == 0) + num_lines = GetNumDisassemblyLines(); + return num_lines; + } + + size_t GetNumSourceLines() const { + if (m_file_sp) + return m_file_sp->GetNumLines(); + return 0; + } + + size_t GetNumDisassemblyLines() const { + if (m_disassembly_sp) + return m_disassembly_sp->GetInstructionList().GetSize(); + return 0; + } + + HandleCharResult WindowDelegateHandleChar(Window &window, int c) override { + const uint32_t num_visible_lines = NumVisibleLines(); + const size_t num_lines = GetNumLines(); + + switch (c) { + case ',': + case KEY_PPAGE: + // Page up key + if (static_cast(m_first_visible_line) > num_visible_lines) + m_first_visible_line -= num_visible_lines; + else + m_first_visible_line = 0; + m_selected_line = m_first_visible_line; + return eKeyHandled; + + case '.': + case KEY_NPAGE: + // Page down key + { + if (m_first_visible_line + num_visible_lines < num_lines) + m_first_visible_line += num_visible_lines; + else if (num_lines < num_visible_lines) + m_first_visible_line = 0; + else + m_first_visible_line = num_lines - num_visible_lines; + m_selected_line = m_first_visible_line; + } + return eKeyHandled; + + case KEY_UP: + if (m_selected_line > 0) { + m_selected_line--; + if (static_cast(m_first_visible_line) > m_selected_line) + m_first_visible_line = m_selected_line; + } + return eKeyHandled; + + case KEY_DOWN: + if (m_selected_line + 1 < num_lines) { + m_selected_line++; + if (m_first_visible_line + num_visible_lines < m_selected_line) + m_first_visible_line++; + } + return eKeyHandled; + + case '\r': + case '\n': + case KEY_ENTER: + // Set a breakpoint and run to the line using a one shot breakpoint + if (GetNumSourceLines() > 0) { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope() && exe_ctx.GetProcessRef().IsAlive()) { + BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( + nullptr, // Don't limit the breakpoint to certain modules + m_file_sp->GetFileSpec(), // Source file + m_selected_line + + 1, // Source line number (m_selected_line is zero based) + 0, // Unspecified column. + 0, // No offset + eLazyBoolCalculate, // Check inlines using global setting + eLazyBoolCalculate, // Skip prologue using global setting, + false, // internal + false, // request_hardware + eLazyBoolCalculate); // move_to_nearest_code + // Make breakpoint one shot + bp_sp->GetOptions()->SetOneShot(true); + exe_ctx.GetProcessRef().Resume(); + } + } else if (m_selected_line < GetNumDisassemblyLines()) { + const Instruction *inst = m_disassembly_sp->GetInstructionList() + .GetInstructionAtIndex(m_selected_line) + .get(); + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasTargetScope()) { + Address addr = inst->GetAddress(); + BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( + addr, // lldb_private::Address + false, // internal + false); // request_hardware + // Make breakpoint one shot + bp_sp->GetOptions()->SetOneShot(true); + exe_ctx.GetProcessRef().Resume(); + } + } + return eKeyHandled; + + case 'b': // 'b' == toggle breakpoint on currently selected line + if (m_selected_line < GetNumSourceLines()) { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasTargetScope()) { + BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( + nullptr, // Don't limit the breakpoint to certain modules + m_file_sp->GetFileSpec(), // Source file + m_selected_line + + 1, // Source line number (m_selected_line is zero based) + 0, // No column specified. + 0, // No offset + eLazyBoolCalculate, // Check inlines using global setting + eLazyBoolCalculate, // Skip prologue using global setting, + false, // internal + false, // request_hardware + eLazyBoolCalculate); // move_to_nearest_code + } + } else if (m_selected_line < GetNumDisassemblyLines()) { + const Instruction *inst = m_disassembly_sp->GetInstructionList() + .GetInstructionAtIndex(m_selected_line) + .get(); + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasTargetScope()) { + Address addr = inst->GetAddress(); + BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( + addr, // lldb_private::Address + false, // internal + false); // request_hardware + } + } + return eKeyHandled; + + case 'd': // 'd' == detach and let run + case 'D': // 'D' == detach and keep stopped + { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) + exe_ctx.GetProcessRef().Detach(c == 'D'); + } + return eKeyHandled; + + case 'k': + // 'k' == kill + { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) + exe_ctx.GetProcessRef().Destroy(false); + } + return eKeyHandled; + + case 'c': + // 'c' == continue + { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasProcessScope()) + exe_ctx.GetProcessRef().Resume(); + } + return eKeyHandled; + + case 'o': + // 'o' == step out + { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasThreadScope() && + StateIsStoppedState(exe_ctx.GetProcessRef().GetState(), true)) { + exe_ctx.GetThreadRef().StepOut(); + } + } + return eKeyHandled; + + case 'n': // 'n' == step over + case 'N': // 'N' == step over instruction + { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasThreadScope() && + StateIsStoppedState(exe_ctx.GetProcessRef().GetState(), true)) { + bool source_step = (c == 'n'); + exe_ctx.GetThreadRef().StepOver(source_step); + } + } + return eKeyHandled; + + case 's': // 's' == step into + case 'S': // 'S' == step into instruction + { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (exe_ctx.HasThreadScope() && + StateIsStoppedState(exe_ctx.GetProcessRef().GetState(), true)) { + bool source_step = (c == 's'); + exe_ctx.GetThreadRef().StepIn(source_step); + } + } + return eKeyHandled; + + case 'h': + window.CreateHelpSubwindow(); + return eKeyHandled; + + default: + break; + } + return eKeyNotHandled; + } + +protected: + typedef std::set BreakpointLines; + typedef std::set BreakpointAddrs; + + Debugger &m_debugger; + SymbolContext m_sc; + SourceManager::FileSP m_file_sp; + SymbolContextScope *m_disassembly_scope; + lldb::DisassemblerSP m_disassembly_sp; + AddressRange m_disassembly_range; + StreamString m_title; + lldb::user_id_t m_tid; + int m_line_width; + uint32_t m_selected_line; // The selected line + uint32_t m_pc_line; // The line with the PC + uint32_t m_stop_id; + uint32_t m_frame_idx; + int m_first_visible_line; + int m_min_x; + int m_min_y; + int m_max_x; + int m_max_y; +}; + +DisplayOptions ValueObjectListDelegate::g_options = {true}; + +IOHandlerCursesGUI::IOHandlerCursesGUI(Debugger &debugger) + : IOHandler(debugger, IOHandler::Type::Curses) {} + +void IOHandlerCursesGUI::Activate() { + IOHandler::Activate(); + if (!m_app_ap) { + m_app_ap.reset(new Application(GetInputFILE(), GetOutputFILE())); + + // This is both a window and a menu delegate + std::shared_ptr app_delegate_sp( + new ApplicationDelegate(*m_app_ap, m_debugger)); + + MenuDelegateSP app_menu_delegate_sp = + std::static_pointer_cast(app_delegate_sp); + MenuSP lldb_menu_sp( + new Menu("LLDB", "F1", KEY_F(1), ApplicationDelegate::eMenuID_LLDB)); + MenuSP exit_menuitem_sp( + new Menu("Exit", nullptr, 'x', ApplicationDelegate::eMenuID_LLDBExit)); + exit_menuitem_sp->SetCannedResult(MenuActionResult::Quit); + lldb_menu_sp->AddSubmenu(MenuSP(new Menu( + "About LLDB", nullptr, 'a', ApplicationDelegate::eMenuID_LLDBAbout))); + lldb_menu_sp->AddSubmenu(MenuSP(new Menu(Menu::Type::Separator))); + lldb_menu_sp->AddSubmenu(exit_menuitem_sp); + + MenuSP target_menu_sp(new Menu("Target", "F2", KEY_F(2), + ApplicationDelegate::eMenuID_Target)); + target_menu_sp->AddSubmenu(MenuSP(new Menu( + "Create", nullptr, 'c', ApplicationDelegate::eMenuID_TargetCreate))); + target_menu_sp->AddSubmenu(MenuSP(new Menu( + "Delete", nullptr, 'd', ApplicationDelegate::eMenuID_TargetDelete))); + + MenuSP process_menu_sp(new Menu("Process", "F3", KEY_F(3), + ApplicationDelegate::eMenuID_Process)); + process_menu_sp->AddSubmenu(MenuSP(new Menu( + "Attach", nullptr, 'a', ApplicationDelegate::eMenuID_ProcessAttach))); + process_menu_sp->AddSubmenu(MenuSP(new Menu( + "Detach", nullptr, 'd', ApplicationDelegate::eMenuID_ProcessDetach))); + process_menu_sp->AddSubmenu(MenuSP(new Menu( + "Launch", nullptr, 'l', ApplicationDelegate::eMenuID_ProcessLaunch))); + process_menu_sp->AddSubmenu(MenuSP(new Menu(Menu::Type::Separator))); + process_menu_sp->AddSubmenu( + MenuSP(new Menu("Continue", nullptr, 'c', + ApplicationDelegate::eMenuID_ProcessContinue))); + process_menu_sp->AddSubmenu(MenuSP(new Menu( + "Halt", nullptr, 'h', ApplicationDelegate::eMenuID_ProcessHalt))); + process_menu_sp->AddSubmenu(MenuSP(new Menu( + "Kill", nullptr, 'k', ApplicationDelegate::eMenuID_ProcessKill))); + + MenuSP thread_menu_sp(new Menu("Thread", "F4", KEY_F(4), + ApplicationDelegate::eMenuID_Thread)); + thread_menu_sp->AddSubmenu(MenuSP(new Menu( + "Step In", nullptr, 'i', ApplicationDelegate::eMenuID_ThreadStepIn))); + thread_menu_sp->AddSubmenu( + MenuSP(new Menu("Step Over", nullptr, 'v', + ApplicationDelegate::eMenuID_ThreadStepOver))); + thread_menu_sp->AddSubmenu(MenuSP(new Menu( + "Step Out", nullptr, 'o', ApplicationDelegate::eMenuID_ThreadStepOut))); + + MenuSP view_menu_sp( + new Menu("View", "F5", KEY_F(5), ApplicationDelegate::eMenuID_View)); + view_menu_sp->AddSubmenu( + MenuSP(new Menu("Backtrace", nullptr, 'b', + ApplicationDelegate::eMenuID_ViewBacktrace))); + view_menu_sp->AddSubmenu( + MenuSP(new Menu("Registers", nullptr, 'r', + ApplicationDelegate::eMenuID_ViewRegisters))); + view_menu_sp->AddSubmenu(MenuSP(new Menu( + "Source", nullptr, 's', ApplicationDelegate::eMenuID_ViewSource))); + view_menu_sp->AddSubmenu( + MenuSP(new Menu("Variables", nullptr, 'v', + ApplicationDelegate::eMenuID_ViewVariables))); + + MenuSP help_menu_sp( + new Menu("Help", "F6", KEY_F(6), ApplicationDelegate::eMenuID_Help)); + help_menu_sp->AddSubmenu(MenuSP(new Menu( + "GUI Help", nullptr, 'g', ApplicationDelegate::eMenuID_HelpGUIHelp))); + + m_app_ap->Initialize(); + WindowSP &main_window_sp = m_app_ap->GetMainWindow(); + + MenuSP menubar_sp(new Menu(Menu::Type::Bar)); + menubar_sp->AddSubmenu(lldb_menu_sp); + menubar_sp->AddSubmenu(target_menu_sp); + menubar_sp->AddSubmenu(process_menu_sp); + menubar_sp->AddSubmenu(thread_menu_sp); + menubar_sp->AddSubmenu(view_menu_sp); + menubar_sp->AddSubmenu(help_menu_sp); + menubar_sp->SetDelegate(app_menu_delegate_sp); + + Rect content_bounds = main_window_sp->GetFrame(); + Rect menubar_bounds = content_bounds.MakeMenuBar(); + Rect status_bounds = content_bounds.MakeStatusBar(); + Rect source_bounds; + Rect variables_bounds; + Rect threads_bounds; + Rect source_variables_bounds; + content_bounds.VerticalSplitPercentage(0.80, source_variables_bounds, + threads_bounds); + source_variables_bounds.HorizontalSplitPercentage(0.70, source_bounds, + variables_bounds); + + WindowSP menubar_window_sp = + main_window_sp->CreateSubWindow("Menubar", menubar_bounds, false); + // Let the menubar get keys if the active window doesn't handle the keys + // that are typed so it can respond to menubar key presses. + menubar_window_sp->SetCanBeActive( + false); // Don't let the menubar become the active window + menubar_window_sp->SetDelegate(menubar_sp); + + WindowSP source_window_sp( + main_window_sp->CreateSubWindow("Source", source_bounds, true)); + WindowSP variables_window_sp( + main_window_sp->CreateSubWindow("Variables", variables_bounds, false)); + WindowSP threads_window_sp( + main_window_sp->CreateSubWindow("Threads", threads_bounds, false)); + WindowSP status_window_sp( + main_window_sp->CreateSubWindow("Status", status_bounds, false)); + status_window_sp->SetCanBeActive( + false); // Don't let the status bar become the active window + main_window_sp->SetDelegate( + std::static_pointer_cast(app_delegate_sp)); + source_window_sp->SetDelegate( + WindowDelegateSP(new SourceFileWindowDelegate(m_debugger))); + variables_window_sp->SetDelegate( + WindowDelegateSP(new FrameVariablesWindowDelegate(m_debugger))); + TreeDelegateSP thread_delegate_sp(new ThreadsTreeDelegate(m_debugger)); + threads_window_sp->SetDelegate(WindowDelegateSP( + new TreeWindowDelegate(m_debugger, thread_delegate_sp))); + status_window_sp->SetDelegate( + WindowDelegateSP(new StatusBarWindowDelegate(m_debugger))); + + // Show the main help window once the first time the curses GUI is launched + static bool g_showed_help = false; + if (!g_showed_help) { + g_showed_help = true; + main_window_sp->CreateHelpSubwindow(); + } + + init_pair(1, COLOR_WHITE, COLOR_BLUE); + init_pair(2, COLOR_BLACK, COLOR_WHITE); + init_pair(3, COLOR_MAGENTA, COLOR_WHITE); + init_pair(4, COLOR_MAGENTA, COLOR_BLACK); + init_pair(5, COLOR_RED, COLOR_BLACK); + } +} + +void IOHandlerCursesGUI::Deactivate() { m_app_ap->Terminate(); } + +void IOHandlerCursesGUI::Run() { + m_app_ap->Run(m_debugger); + SetIsDone(true); +} + +IOHandlerCursesGUI::~IOHandlerCursesGUI() = default; + +void IOHandlerCursesGUI::Cancel() {} + +bool IOHandlerCursesGUI::Interrupt() { return false; } + +void IOHandlerCursesGUI::GotEOF() {} + +#endif // LLDB_DISABLE_CURSES diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index a14bd3d370a1b..cc4eea674170b 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -613,11 +613,10 @@ void Module::FindCompileUnits(const FileSpec &path, const size_t num_compile_units = GetNumCompileUnits(); SymbolContext sc; sc.module_sp = shared_from_this(); - const bool compare_directory = (bool)path.GetDirectory(); for (size_t i = 0; i < num_compile_units; ++i) { sc.comp_unit = GetCompileUnitAtIndex(i).get(); if (sc.comp_unit) { - if (FileSpec::Equal(*sc.comp_unit, path, compare_directory)) + if (FileSpec::Match(path, sc.comp_unit->GetPrimaryFile())) sc_list.Append(sc); } } @@ -1060,34 +1059,35 @@ std::string Module::GetSpecificationDescription() const { return spec; } -void Module::GetDescription(Stream *s, lldb::DescriptionLevel level) { +void Module::GetDescription(llvm::raw_ostream &s, + lldb::DescriptionLevel level) { std::lock_guard guard(m_mutex); if (level >= eDescriptionLevelFull) { if (m_arch.IsValid()) - s->Printf("(%s) ", m_arch.GetArchitectureName()); + s << llvm::formatv("({0}) ", m_arch.GetArchitectureName()); } if (level == eDescriptionLevelBrief) { const char *filename = m_file.GetFilename().GetCString(); if (filename) - s->PutCString(filename); + s << filename; } else { char path[PATH_MAX]; if (m_file.GetPath(path, sizeof(path))) - s->PutCString(path); + s << path; } const char *object_name = m_object_name.GetCString(); if (object_name) - s->Printf("(%s)", object_name); + s << llvm::formatv("({0})", object_name); } void Module::ReportError(const char *format, ...) { if (format && format[0]) { StreamString strm; strm.PutCString("error: "); - GetDescription(&strm, lldb::eDescriptionLevelBrief); + GetDescription(strm.AsRawOstream(), lldb::eDescriptionLevelBrief); strm.PutChar(' '); va_list args; va_start(args, format); @@ -1118,7 +1118,7 @@ void Module::ReportErrorIfModifyDetected(const char *format, ...) { if (format) { StreamString strm; strm.PutCString("error: the object file "); - GetDescription(&strm, lldb::eDescriptionLevelFull); + GetDescription(strm.AsRawOstream(), lldb::eDescriptionLevelFull); strm.PutCString(" has been modified\n"); va_list args; @@ -1144,7 +1144,7 @@ void Module::ReportWarning(const char *format, ...) { if (format && format[0]) { StreamString strm; strm.PutCString("warning: "); - GetDescription(&strm, lldb::eDescriptionLevelFull); + GetDescription(strm.AsRawOstream(), lldb::eDescriptionLevelFull); strm.PutChar(' '); va_list args; @@ -1165,7 +1165,7 @@ void Module::ReportWarning(const char *format, ...) { void Module::LogMessage(Log *log, const char *format, ...) { if (log != nullptr) { StreamString log_message; - GetDescription(&log_message, lldb::eDescriptionLevelFull); + GetDescription(log_message.AsRawOstream(), lldb::eDescriptionLevelFull); log_message.PutCString(": "); va_list args; va_start(args, format); @@ -1178,7 +1178,7 @@ void Module::LogMessage(Log *log, const char *format, ...) { void Module::LogMessageVerboseBacktrace(Log *log, const char *format, ...) { if (log != nullptr) { StreamString log_message; - GetDescription(&log_message, lldb::eDescriptionLevelFull); + GetDescription(log_message.AsRawOstream(), lldb::eDescriptionLevelFull); log_message.PutCString(": "); va_list args; va_start(args, format); @@ -1559,19 +1559,13 @@ bool Module::MatchesModuleSpec(const ModuleSpec &module_ref) { } const FileSpec &file_spec = module_ref.GetFileSpec(); - if (file_spec) { - if (!FileSpec::Equal(file_spec, m_file, (bool)file_spec.GetDirectory()) && - !FileSpec::Equal(file_spec, m_platform_file, - (bool)file_spec.GetDirectory())) - return false; - } + if (!FileSpec::Match(file_spec, m_file) && + !FileSpec::Match(file_spec, m_platform_file)) + return false; const FileSpec &platform_file_spec = module_ref.GetPlatformFileSpec(); - if (platform_file_spec) { - if (!FileSpec::Equal(platform_file_spec, GetPlatformFileSpec(), - (bool)platform_file_spec.GetDirectory())) - return false; - } + if (!FileSpec::Match(platform_file_spec, GetPlatformFileSpec())) + return false; const ArchSpec &arch = module_ref.GetArchitecture(); if (arch.IsValid()) { diff --git a/lldb/source/Core/SearchFilter.cpp b/lldb/source/Core/SearchFilter.cpp index 8f80caa3eb4de..077aa89674253 100644 --- a/lldb/source/Core/SearchFilter.cpp +++ b/lldb/source/Core/SearchFilter.cpp @@ -403,13 +403,11 @@ SearchFilterByModule::~SearchFilterByModule() = default; bool SearchFilterByModule::ModulePasses(const ModuleSP &module_sp) { return (module_sp && - FileSpec::Equal(module_sp->GetFileSpec(), m_module_spec, false)); + FileSpec::Match(m_module_spec, module_sp->GetFileSpec())); } bool SearchFilterByModule::ModulePasses(const FileSpec &spec) { - // Do a full match only if "spec" has a directory - const bool full_match = (bool)spec.GetDirectory(); - return FileSpec::Equal(spec, m_module_spec, full_match); + return FileSpec::Match(m_module_spec, spec); } bool SearchFilterByModule::AddressPasses(Address &address) { @@ -443,8 +441,7 @@ void SearchFilterByModule::Search(Searcher &searcher) { const size_t num_modules = target_modules.GetSize(); for (size_t i = 0; i < num_modules; i++) { Module *module = target_modules.GetModulePointerAtIndexUnlocked(i); - const bool full_match = (bool)m_module_spec.GetDirectory(); - if (FileSpec::Equal(m_module_spec, module->GetFileSpec(), full_match)) { + if (FileSpec::Match(m_module_spec, module->GetFileSpec())) { SymbolContext matchingContext(m_target_sp, module->shared_from_this()); Searcher::CallbackReturn shouldContinue; @@ -726,8 +723,11 @@ bool SearchFilterByModuleListAndCU::AddressPasses(Address &address) { if (m_cu_spec_list.GetSize() != 0) return false; // Has no comp_unit so can't pass the file check. } - if (m_cu_spec_list.FindFileIndex(0, sym_ctx.comp_unit, false) == UINT32_MAX) - return false; // Fails the file check + FileSpec cu_spec; + if (sym_ctx.comp_unit) + cu_spec = sym_ctx.comp_unit->GetPrimaryFile(); + if (m_cu_spec_list.FindFileIndex(0, cu_spec, false) == UINT32_MAX) + return false; // Fails the file check return SearchFilterByModuleList::ModulePasses(sym_ctx.module_sp); } @@ -736,8 +736,8 @@ bool SearchFilterByModuleListAndCU::CompUnitPasses(FileSpec &fileSpec) { } bool SearchFilterByModuleListAndCU::CompUnitPasses(CompileUnit &compUnit) { - bool in_cu_list = - m_cu_spec_list.FindFileIndex(0, compUnit, false) != UINT32_MAX; + bool in_cu_list = m_cu_spec_list.FindFileIndex(0, compUnit.GetPrimaryFile(), + false) != UINT32_MAX; if (in_cu_list) { ModuleSP module_sp(compUnit.GetModule()); if (module_sp) { @@ -787,8 +787,9 @@ void SearchFilterByModuleListAndCU::Search(Searcher &searcher) { CompUnitSP cu_sp = module_sp->GetCompileUnitAtIndex(cu_idx); matchingContext.comp_unit = cu_sp.get(); if (matchingContext.comp_unit) { - if (m_cu_spec_list.FindFileIndex(0, *matchingContext.comp_unit, - false) != UINT32_MAX) { + if (m_cu_spec_list.FindFileIndex( + 0, matchingContext.comp_unit->GetPrimaryFile(), false) != + UINT32_MAX) { shouldContinue = DoCUIteration(module_sp, matchingContext, searcher); if (shouldContinue == Searcher::eCallbackReturnStop) diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 7615dc1d65c7f..e8fcca4603dfb 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -94,6 +94,8 @@ const char *Section::GetTypeAsCString() const { return "dwarf-ranges"; case eSectionTypeDWARFDebugRngLists: return "dwarf-rnglists"; + case eSectionTypeDWARFDebugRngListsDwo: + return "dwarf-rnglists-dwo"; case eSectionTypeDWARFDebugStr: return "dwarf-str"; case eSectionTypeDWARFDebugStrDwo: diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index 42741e4ba4fe4..8e0cc57f80c11 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -64,7 +64,8 @@ SourceManager::~SourceManager() {} SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { bool same_as_previous = - m_last_file_sp && m_last_file_sp->FileSpecMatches(file_spec); + m_last_file_sp && + FileSpec::Match(file_spec, m_last_file_sp->GetFileSpec()); DebuggerSP debugger_sp(m_debugger_wp.lock()); FileSP file_sp; @@ -399,24 +400,25 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, if (num_matches != 0) { if (num_matches > 1) { SymbolContext sc; - FileSpec *test_cu_spec = nullptr; + CompileUnit *test_cu = nullptr; for (unsigned i = 0; i < num_matches; i++) { sc_list.GetContextAtIndex(i, sc); if (sc.comp_unit) { - if (test_cu_spec) { - if (test_cu_spec != static_cast(sc.comp_unit)) + if (test_cu) { + if (test_cu != sc.comp_unit) got_multiple = true; break; } else - test_cu_spec = sc.comp_unit; + test_cu = sc.comp_unit; } } } if (!got_multiple) { SymbolContext sc; sc_list.GetContextAtIndex(0, sc); - m_file_spec = sc.comp_unit; + if (sc.comp_unit) + m_file_spec = sc.comp_unit->GetPrimaryFile(); m_mod_time = FileSystem::Instance().GetModificationTime(m_file_spec); } } @@ -601,10 +603,6 @@ void SourceManager::File::FindLinesMatchingRegex( } } -bool SourceManager::File::FileSpecMatches(const FileSpec &file_spec) { - return FileSpec::Equal(m_file_spec, file_spec, false); -} - bool lldb_private::operator==(const SourceManager::File &lhs, const SourceManager::File &rhs) { if (lhs.m_file_spec != rhs.m_file_spec) diff --git a/lldb/source/Core/ValueObjectSyntheticFilter.cpp b/lldb/source/Core/ValueObjectSyntheticFilter.cpp index a6bf35eac70a2..a30be1b083384 100644 --- a/lldb/source/Core/ValueObjectSyntheticFilter.cpp +++ b/lldb/source/Core/ValueObjectSyntheticFilter.cpp @@ -48,8 +48,9 @@ class DummySyntheticFrontEnd : public SyntheticChildrenFrontEnd { ValueObjectSynthetic::ValueObjectSynthetic(ValueObject &parent, lldb::SyntheticChildrenSP filter) : ValueObject(parent), m_synth_sp(filter), m_children_byindex(), - m_name_toindex(), m_synthetic_children_count(UINT32_MAX), - m_synthetic_children_cache(), m_parent_type_name(parent.GetTypeName()), + m_name_toindex(), m_synthetic_children_cache(), + m_synthetic_children_count(UINT32_MAX), + m_parent_type_name(parent.GetTypeName()), m_might_have_children(eLazyBoolCalculate), m_provides_value(eLazyBoolCalculate) { SetName(parent.GetName()); @@ -177,14 +178,20 @@ bool ValueObjectSynthetic::UpdateValue() { "filter said caches are stale - clearing", GetName().AsCString()); // filter said that cached values are stale - m_children_byindex.Clear(); - m_name_toindex.Clear(); + { + std::lock_guard guard(m_child_mutex); + m_children_byindex.clear(); + m_name_toindex.clear(); + } // usually, an object's value can change but this does not alter its // children count for a synthetic VO that might indeed happen, so we need // to tell the upper echelons that they need to come back to us asking for // children m_children_count_valid = false; - m_synthetic_children_cache.Clear(); + { + std::lock_guard guard(m_child_mutex); + m_synthetic_children_cache.clear(); + } m_synthetic_children_count = UINT32_MAX; m_might_have_children = eLazyBoolCalculate; } else { @@ -232,7 +239,16 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, UpdateValueIfNeeded(); ValueObject *valobj; - if (!m_children_byindex.GetValueForKey(idx, valobj)) { + bool child_is_cached; + { + std::lock_guard guard(m_child_mutex); + auto cached_child_it = m_children_byindex.find(idx); + child_is_cached = cached_child_it != m_children_byindex.end(); + if (child_is_cached) + valobj = cached_child_it->second; + } + + if (!child_is_cached) { if (can_create && m_synth_filter_up != nullptr) { LLDB_LOGF(log, "[ValueObjectSynthetic::GetChildAtIndex] name=%s, child at " @@ -254,9 +270,12 @@ lldb::ValueObjectSP ValueObjectSynthetic::GetChildAtIndex(size_t idx, if (!synth_guy) return synth_guy; - if (synth_guy->IsSyntheticChildrenGenerated()) - m_synthetic_children_cache.AppendObject(synth_guy); - m_children_byindex.SetValueForKey(idx, synth_guy.get()); + { + std::lock_guard guard(m_child_mutex); + if (synth_guy->IsSyntheticChildrenGenerated()) + m_synthetic_children_cache.push_back(synth_guy); + m_children_byindex[idx] = synth_guy.get(); + } synth_guy->SetPreferredDisplayLanguageIfNeeded( GetPreferredDisplayLanguage()); return synth_guy; @@ -297,13 +316,21 @@ size_t ValueObjectSynthetic::GetIndexOfChildWithName(ConstString name) { UpdateValueIfNeeded(); uint32_t found_index = UINT32_MAX; - bool did_find = m_name_toindex.GetValueForKey(name.GetCString(), found_index); + bool did_find; + { + std::lock_guard guard(m_child_mutex); + auto name_to_index = m_name_toindex.find(name.GetCString()); + did_find = name_to_index != m_name_toindex.end(); + if (did_find) + found_index = name_to_index->second; + } if (!did_find && m_synth_filter_up != nullptr) { uint32_t index = m_synth_filter_up->GetIndexOfChildWithName(name); if (index == UINT32_MAX) return index; - m_name_toindex.SetValueForKey(name.GetCString(), index); + std::lock_guard guard(m_child_mutex); + m_name_toindex[name.GetCString()] = index; return index; } else if (!did_find && m_synth_filter_up == nullptr) return UINT32_MAX; diff --git a/lldb/source/DataFormatters/DataVisualization.cpp b/lldb/source/DataFormatters/DataVisualization.cpp index 08b3b34447bba..e73d44f60f03f 100644 --- a/lldb/source/DataFormatters/DataVisualization.cpp +++ b/lldb/source/DataFormatters/DataVisualization.cpp @@ -122,8 +122,7 @@ void DataVisualization::Categories::Enable(ConstString category, TypeCategoryMap::Position pos) { if (GetFormatManager().GetCategory(category)->IsEnabled()) GetFormatManager().DisableCategory(category); - GetFormatManager().EnableCategory( - category, pos, std::initializer_list()); + GetFormatManager().EnableCategory(category, pos, {}); } void DataVisualization::Categories::Enable(lldb::LanguageType lang_type) { diff --git a/lldb/source/DataFormatters/TypeCategory.cpp b/lldb/source/DataFormatters/TypeCategory.cpp index fed2dfb3c7c5b..be3b31603eac4 100644 --- a/lldb/source/DataFormatters/TypeCategory.cpp +++ b/lldb/source/DataFormatters/TypeCategory.cpp @@ -13,18 +13,14 @@ using namespace lldb; using namespace lldb_private; -TypeCategoryImpl::TypeCategoryImpl( - IFormatChangeListener *clist, ConstString name, - std::initializer_list langs) +TypeCategoryImpl::TypeCategoryImpl(IFormatChangeListener *clist, + ConstString name) : m_format_cont("format", "regex-format", clist), m_summary_cont("summary", "regex-summary", clist), m_filter_cont("filter", "regex-filter", clist), m_synth_cont("synth", "regex-synth", clist), m_validator_cont("validator", "regex-validator", clist), m_enabled(false), - m_change_listener(clist), m_mutex(), m_name(name), m_languages() { - for (const lldb::LanguageType lang : langs) - AddLanguage(lang); -} + m_change_listener(clist), m_mutex(), m_name(name), m_languages() {} static bool IsApplicable(lldb::LanguageType category_lang, lldb::LanguageType valobj_lang) { @@ -90,12 +86,6 @@ void TypeCategoryImpl::AddLanguage(lldb::LanguageType lang) { m_languages.push_back(lang); } -bool TypeCategoryImpl::HasLanguage(lldb::LanguageType lang) { - const auto iter = std::find(m_languages.begin(), m_languages.end(), lang), - end = m_languages.end(); - return (iter != end); -} - bool TypeCategoryImpl::Get(ValueObject &valobj, const FormattersMatchVector &candidates, lldb::TypeFormatImplSP &entry, uint32_t *reason) { diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index a063da0f4e401..8947500959cbd 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -146,7 +146,7 @@ void DWARFExpression::GetDescription(Stream *s, lldb::DescriptionLevel level, // We have a new base address if (count > 0) s->PutCString(", "); - *s << "base_addr = " << end_addr_offset; + s->Format("base_addr = {0:x}", end_addr_offset); } } diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp index 3ae837866faf1..b29c218f0369b 100644 --- a/lldb/source/Host/common/Editline.cpp +++ b/lldb/source/Host/common/Editline.cpp @@ -97,6 +97,33 @@ bool IsOnlySpaces(const EditLineStringType &content) { return true; } +static int GetOperation(HistoryOperation op) { + // The naming used by editline for the history operations is counter + // intuitive to how it's used here. + // + // - The H_PREV operation returns the previous element in the history, which + // is newer than the current one. + // + // - The H_NEXT operation returns the next element in the history, which is + // older than the current one. + // + // The naming of the enum entries match the semantic meaning. + switch(op) { + case HistoryOperation::Oldest: + return H_FIRST; + case HistoryOperation::Older: + return H_NEXT; + case HistoryOperation::Current: + return H_CURR; + case HistoryOperation::Newer: + return H_PREV; + case HistoryOperation::Newest: + return H_LAST; + } + llvm_unreachable("Fully covered switch!"); +} + + EditLineStringType CombineLines(const std::vector &lines) { EditLineStringStreamType combined_stream; for (EditLineStringType line : lines) { @@ -423,7 +450,8 @@ StringList Editline::GetInputAsStringList(int line_count) { return lines; } -unsigned char Editline::RecallHistory(bool earlier) { +unsigned char Editline::RecallHistory(HistoryOperation op) { + assert(op == HistoryOperation::Older || op == HistoryOperation::Newer); if (!m_history_sp || !m_history_sp->IsValid()) return CC_ERROR; @@ -433,27 +461,38 @@ unsigned char Editline::RecallHistory(bool earlier) { // Treat moving from the "live" entry differently if (!m_in_history) { - if (!earlier) + switch (op) { + case HistoryOperation::Newer: return CC_ERROR; // Can't go newer than the "live" entry - if (history_w(pHistory, &history_event, H_FIRST) == -1) - return CC_ERROR; - - // Save any edits to the "live" entry in case we return by moving forward - // in history (it would be more bash-like to save over any current entry, - // but libedit doesn't offer the ability to add entries anywhere except the - // end.) - SaveEditedLine(); - m_live_history_lines = m_input_lines; - m_in_history = true; + case HistoryOperation::Older: { + if (history_w(pHistory, &history_event, + GetOperation(HistoryOperation::Newest)) == -1) + return CC_ERROR; + // Save any edits to the "live" entry in case we return by moving forward + // in history (it would be more bash-like to save over any current entry, + // but libedit doesn't offer the ability to add entries anywhere except + // the end.) + SaveEditedLine(); + m_live_history_lines = m_input_lines; + m_in_history = true; + } break; + default: + llvm_unreachable("unsupported history direction"); + } } else { - if (history_w(pHistory, &history_event, earlier ? H_PREV : H_NEXT) == -1) { - // Can't move earlier than the earliest entry - if (earlier) + if (history_w(pHistory, &history_event, GetOperation(op)) == -1) { + switch (op) { + case HistoryOperation::Older: + // Can't move earlier than the earliest entry. return CC_ERROR; - - // ... but moving to newer than the newest yields the "live" entry - new_input_lines = m_live_history_lines; - m_in_history = false; + case HistoryOperation::Newer: + // Moving to newer-than-the-newest entry yields the "live" entry. + new_input_lines = m_live_history_lines; + m_in_history = false; + break; + default: + llvm_unreachable("unsupported history direction"); + } } } @@ -468,8 +507,17 @@ unsigned char Editline::RecallHistory(bool earlier) { // Prepare to edit the last line when moving to previous entry, or the first // line when moving to next entry - SetCurrentLine(m_current_line_index = - earlier ? (int)m_input_lines.size() - 1 : 0); + switch (op) { + case HistoryOperation::Older: + m_current_line_index = (int)m_input_lines.size() - 1; + break; + case HistoryOperation::Newer: + m_current_line_index = 0; + break; + default: + llvm_unreachable("unsupported history direction"); + } + SetCurrentLine(m_current_line_index); MoveCursor(CursorLocation::BlockEnd, CursorLocation::EditingPrompt); return CC_NEWLINE; } @@ -721,7 +769,7 @@ unsigned char Editline::PreviousLineCommand(int ch) { SaveEditedLine(); if (m_current_line_index == 0) { - return RecallHistory(true); + return RecallHistory(HistoryOperation::Older); } // Start from a known location @@ -747,7 +795,7 @@ unsigned char Editline::NextLineCommand(int ch) { // Don't add an extra line if the existing last line is blank, move through // history instead if (IsOnlySpaces()) { - return RecallHistory(false); + return RecallHistory(HistoryOperation::Newer); } // Determine indentation for the new line @@ -779,13 +827,13 @@ unsigned char Editline::NextLineCommand(int ch) { unsigned char Editline::PreviousHistoryCommand(int ch) { SaveEditedLine(); - return RecallHistory(true); + return RecallHistory(HistoryOperation::Older); } unsigned char Editline::NextHistoryCommand(int ch) { SaveEditedLine(); - return RecallHistory(false); + return RecallHistory(HistoryOperation::Newer); } unsigned char Editline::FixIndentationCommand(int ch) { diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index 8c7393739bc68..03880ff433bd6 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -1130,7 +1130,7 @@ static Status LaunchProcessPosixSpawn(const char *exe_path, // --arch as part of the shell invocation // to do that job on OSX. - if (launch_info.GetShell() == nullptr) { + if (launch_info.GetShell() == FileSpec()) { // We don't need to do this for ARM, and we really shouldn't now that we // have multiple CPU subtypes and no posix_spawnattr call that allows us // to set which CPU subtype to launch... diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index e022481484135..5a4e466144a6f 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -362,10 +362,23 @@ void CommandInterpreter::Initialize() { "controlled by the type's author."); po->SetHelpLong(""); } - AddAlias("parray", cmd_obj_sp, "--element-count %1 --")->SetHelpLong(""); - AddAlias("poarray", cmd_obj_sp, - "--object-description --element-count %1 --") - ->SetHelpLong(""); + CommandAlias *parray_alias = AddAlias("parray", cmd_obj_sp, + "--element-count %1 --"); + if (parray_alias) { + parray_alias->SetHelp + ("parray -- lldb will evaluate EXPRESSION " + "to get a typed-pointer-to-an-array in memory, and will display " + "COUNT elements of that type from the array."); + parray_alias->SetHelpLong(""); + } + CommandAlias *poarray_alias = AddAlias("poarray", cmd_obj_sp, + "--object-description --element-count %1 --"); + if (poarray_alias) { + poarray_alias->SetHelp("poarray -- lldb will " + "evaluate EXPRESSION to get the address of an array of COUNT " + "objects in memory, and will call po on them."); + poarray_alias->SetHelpLong(""); + } } cmd_obj_sp = GetCommandSPExact("process kill", false); diff --git a/lldb/source/Interpreter/OptionGroupPythonClassWithDict.cpp b/lldb/source/Interpreter/OptionGroupPythonClassWithDict.cpp index 20a7ed1f76ca3..e41f9d7b40ee5 100644 --- a/lldb/source/Interpreter/OptionGroupPythonClassWithDict.cpp +++ b/lldb/source/Interpreter/OptionGroupPythonClassWithDict.cpp @@ -127,6 +127,7 @@ void OptionGroupPythonClassWithDict::OptionParsingStarting( // the user didn't pass any -k -v pairs. We want to be able to warn if these // were passed when the function they passed won't use them. m_dict_sp.reset(); + m_name.clear(); } Status OptionGroupPythonClassWithDict::OptionParsingFinished( diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp index 654585cb35eba..fb8b48cc108be 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp @@ -734,7 +734,7 @@ bool DynamicLoaderMacOSXDYLD::InitializeFromAllImageInfos() { if (!module_sp->IsLoadedInTarget(&target)) { if (log) { StreamString s; - module_sp->GetDescription(&s); + module_sp->GetDescription(s.AsRawOstream()); LLDB_LOGF(log, "Unloading pre-run module: %s.", s.GetData()); } not_loaded_modules.Append(module_sp); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 7440f6a0c3636..51540902e2dcc 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -57,10 +57,11 @@ ClangASTSource::ClangASTSource(const lldb::TargetSP &target) } } -void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context, +void ClangASTSource::InstallASTContext(ClangASTContext &clang_ast_context, clang::FileManager &file_manager, bool is_shared_context) { - m_ast_context = &ast_context; + m_ast_context = clang_ast_context.getASTContext(); + m_clang_ast_context = &clang_ast_context; m_file_manager = &file_manager; if (m_target->GetUseModernTypeLookup()) { // Configure the ExternalASTMerger. The merger needs to be able to import @@ -69,7 +70,7 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context, // AST contexts. lldbassert(!m_merger_up); - clang::ExternalASTMerger::ImporterTarget target = {ast_context, + clang::ExternalASTMerger::ImporterTarget target = {*m_ast_context, file_manager}; std::vector sources; for (lldb::ModuleSP module_sp : m_target->GetImages().Modules()) { @@ -132,7 +133,7 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context, m_merger_up = std::make_unique(target, sources); } else { - m_ast_importer_sp->InstallMapCompleter(&ast_context, *this); + m_ast_importer_sp->InstallMapCompleter(m_ast_context, *this); } } @@ -363,7 +364,6 @@ void ClangASTSource::CompleteType(TagDecl *tag_decl) { TypeList types; ConstString name(tag_decl->getName().str().c_str()); - CompilerDeclContext namespace_decl; const ModuleList &module_list = m_target->GetImages(); @@ -776,7 +776,7 @@ void ClangASTSource::FindExternalVisibleDecls(NameSearchContext &context) { } clang::Sema *ClangASTSource::getSema() { - return ClangASTContext::GetASTContext(m_ast_context)->getSema(); + return m_clang_ast_context->getSema(); } bool ClangASTSource::IgnoreName(const ConstString name, @@ -2059,8 +2059,7 @@ CompilerType ClangASTSource::GuardedCopyType(const CompilerType &src_type) { // seems to be generating bad types on occasion. return CompilerType(); - return CompilerType(ClangASTContext::GetASTContext(m_ast_context), - copied_qual_type.getAsOpaquePtr()); + return CompilerType(m_clang_ast_context, copied_qual_type.getAsOpaquePtr()); } clang::NamedDecl *NameSearchContext::AddVarDecl(const CompilerType &type) { @@ -2187,10 +2186,9 @@ clang::NamedDecl *NameSearchContext::AddGenericFunDecl() { ArrayRef(), // argument types proto_info)); - return AddFunDecl( - CompilerType(ClangASTContext::GetASTContext(m_ast_source.m_ast_context), - generic_function_type.getAsOpaquePtr()), - true); + return AddFunDecl(CompilerType(m_ast_source.m_clang_ast_context, + generic_function_type.getAsOpaquePtr()), + true); } clang::NamedDecl * diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h index d8e784f49b10e..194233e4a028e 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h @@ -57,7 +57,7 @@ class ClangASTSource : public ClangExternalASTSourceCommon, } void MaterializeVisibleDecls(const clang::DeclContext *DC) { return; } - void InstallASTContext(clang::ASTContext &ast_context, + void InstallASTContext(ClangASTContext &ast_context, clang::FileManager &file_manager, bool is_shared_context = false); @@ -408,6 +408,8 @@ class ClangASTSource : public ClangExternalASTSourceCommon, const lldb::TargetSP m_target; /// The AST context requests are coming in for. clang::ASTContext *m_ast_context; + /// The ClangASTContext for m_ast_context. + ClangASTContext *m_clang_ast_context; /// The file manager paired with the AST context. clang::FileManager *m_file_manager; /// The target's AST importer. diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 4966ac1640feb..fc25a2e72e3b1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1076,12 +1076,9 @@ void ClangExpressionDeclMap::LookupLocalVarNamespace( if (!frame_ast) return; - ClangASTContext *map_ast = ClangASTContext::GetASTContext(m_ast_context); - if (!map_ast) - return; - - clang::NamespaceDecl *namespace_decl = map_ast->GetUniqueNamespaceDeclaration( - g_lldb_local_vars_namespace_cstr, nullptr); + clang::NamespaceDecl *namespace_decl = + m_clang_ast_context->GetUniqueNamespaceDeclaration( + g_lldb_local_vars_namespace_cstr, nullptr); if (!namespace_decl) return; @@ -1180,6 +1177,104 @@ bool ClangExpressionDeclMap::LookupLocalVariable( return variable_found; } +/// Structure to hold the info needed when comparing function +/// declarations. +namespace { +struct FuncDeclInfo { + ConstString m_name; + CompilerType m_copied_type; + uint32_t m_decl_lvl; + SymbolContext m_sym_ctx; +}; +} // namespace + +SymbolContextList ClangExpressionDeclMap::SearchFunctionsInSymbolContexts( + const SymbolContextList &sc_list, + const CompilerDeclContext &frame_decl_context) { + // First, symplify things by looping through the symbol contexts to + // remove unwanted functions and separate out the functions we want to + // compare and prune into a separate list. Cache the info needed about + // the function declarations in a vector for efficiency. + uint32_t num_indices = sc_list.GetSize(); + SymbolContextList sc_sym_list; + std::vector decl_infos; + decl_infos.reserve(num_indices); + clang::DeclContext *frame_decl_ctx = + (clang::DeclContext *)frame_decl_context.GetOpaqueDeclContext(); + ClangASTContext *ast = llvm::dyn_cast_or_null( + frame_decl_context.GetTypeSystem()); + + for (uint32_t index = 0; index < num_indices; ++index) { + FuncDeclInfo fdi; + SymbolContext sym_ctx; + sc_list.GetContextAtIndex(index, sym_ctx); + + // We don't know enough about symbols to compare them, but we should + // keep them in the list. + Function *function = sym_ctx.function; + if (!function) { + sc_sym_list.Append(sym_ctx); + continue; + } + // Filter out functions without declaration contexts, as well as + // class/instance methods, since they'll be skipped in the code that + // follows anyway. + CompilerDeclContext func_decl_context = function->GetDeclContext(); + if (!func_decl_context || + func_decl_context.IsClassMethod(nullptr, nullptr, nullptr)) + continue; + // We can only prune functions for which we can copy the type. + CompilerType func_clang_type = function->GetType()->GetFullCompilerType(); + CompilerType copied_func_type = GuardedCopyType(func_clang_type); + if (!copied_func_type) { + sc_sym_list.Append(sym_ctx); + continue; + } + + fdi.m_sym_ctx = sym_ctx; + fdi.m_name = function->GetName(); + fdi.m_copied_type = copied_func_type; + fdi.m_decl_lvl = LLDB_INVALID_DECL_LEVEL; + if (fdi.m_copied_type && func_decl_context) { + // Call CountDeclLevels to get the number of parent scopes we have + // to look through before we find the function declaration. When + // comparing functions of the same type, the one with a lower count + // will be closer to us in the lookup scope and shadows the other. + clang::DeclContext *func_decl_ctx = + (clang::DeclContext *)func_decl_context.GetOpaqueDeclContext(); + fdi.m_decl_lvl = ast->CountDeclLevels(frame_decl_ctx, func_decl_ctx, + &fdi.m_name, &fdi.m_copied_type); + } + decl_infos.emplace_back(fdi); + } + + // Loop through the functions in our cache looking for matching types, + // then compare their scope levels to see which is closer. + std::multimap matches; + for (const FuncDeclInfo &fdi : decl_infos) { + const CompilerType t = fdi.m_copied_type; + auto q = matches.find(t); + if (q != matches.end()) { + if (q->second->m_decl_lvl > fdi.m_decl_lvl) + // This function is closer; remove the old set. + matches.erase(t); + else if (q->second->m_decl_lvl < fdi.m_decl_lvl) + // The functions in our set are closer - skip this one. + continue; + } + matches.insert(std::make_pair(t, &fdi)); + } + + // Loop through our matches and add their symbol contexts to our list. + SymbolContextList sc_func_list; + for (const auto &q : matches) + sc_func_list.Append(q.second->m_sym_ctx); + + // Rejoin the lists with the functions in front. + sc_func_list.Append(sc_sym_list); + return sc_func_list; +} + void ClangExpressionDeclMap::LookupFunction(NameSearchContext &context, lldb::ModuleSP module_sp, ConstString name, @@ -1237,98 +1332,7 @@ void ClangExpressionDeclMap::LookupFunction(NameSearchContext &context, // We can't do this without a compiler decl context for our frame. if (frame_decl_context) { - clang::DeclContext *frame_decl_ctx = - (clang::DeclContext *)frame_decl_context.GetOpaqueDeclContext(); - ClangASTContext *ast = llvm::dyn_cast_or_null( - frame_decl_context.GetTypeSystem()); - - // Structure to hold the info needed when comparing function - // declarations. - struct FuncDeclInfo { - ConstString m_name; - CompilerType m_copied_type; - uint32_t m_decl_lvl; - SymbolContext m_sym_ctx; - }; - - // First, symplify things by looping through the symbol contexts to - // remove unwanted functions and separate out the functions we want to - // compare and prune into a separate list. Cache the info needed about - // the function declarations in a vector for efficiency. - SymbolContextList sc_sym_list; - uint32_t num_indices = sc_list.GetSize(); - std::vector fdi_cache; - fdi_cache.reserve(num_indices); - for (uint32_t index = 0; index < num_indices; ++index) { - FuncDeclInfo fdi; - SymbolContext sym_ctx; - sc_list.GetContextAtIndex(index, sym_ctx); - - // We don't know enough about symbols to compare them, but we should - // keep them in the list. - Function *function = sym_ctx.function; - if (!function) { - sc_sym_list.Append(sym_ctx); - continue; - } - // Filter out functions without declaration contexts, as well as - // class/instance methods, since they'll be skipped in the code that - // follows anyway. - CompilerDeclContext func_decl_context = function->GetDeclContext(); - if (!func_decl_context || - func_decl_context.IsClassMethod(nullptr, nullptr, nullptr)) - continue; - // We can only prune functions for which we can copy the type. - CompilerType func_clang_type = - function->GetType()->GetFullCompilerType(); - CompilerType copied_func_type = GuardedCopyType(func_clang_type); - if (!copied_func_type) { - sc_sym_list.Append(sym_ctx); - continue; - } - - fdi.m_sym_ctx = sym_ctx; - fdi.m_name = function->GetName(); - fdi.m_copied_type = copied_func_type; - fdi.m_decl_lvl = LLDB_INVALID_DECL_LEVEL; - if (fdi.m_copied_type && func_decl_context) { - // Call CountDeclLevels to get the number of parent scopes we have - // to look through before we find the function declaration. When - // comparing functions of the same type, the one with a lower count - // will be closer to us in the lookup scope and shadows the other. - clang::DeclContext *func_decl_ctx = - (clang::DeclContext *)func_decl_context.GetOpaqueDeclContext(); - fdi.m_decl_lvl = ast->CountDeclLevels( - frame_decl_ctx, func_decl_ctx, &fdi.m_name, &fdi.m_copied_type); - } - fdi_cache.emplace_back(fdi); - } - - // Loop through the functions in our cache looking for matching types, - // then compare their scope levels to see which is closer. - std::multimap matches; - for (const FuncDeclInfo &fdi : fdi_cache) { - const CompilerType t = fdi.m_copied_type; - auto q = matches.find(t); - if (q != matches.end()) { - if (q->second->m_decl_lvl > fdi.m_decl_lvl) - // This function is closer; remove the old set. - matches.erase(t); - else if (q->second->m_decl_lvl < fdi.m_decl_lvl) - // The functions in our set are closer - skip this one. - continue; - } - matches.insert(std::make_pair(t, &fdi)); - } - - // Loop through our matches and add their symbol contexts to our list. - SymbolContextList sc_func_list; - for (const auto &q : matches) - sc_func_list.Append(q.second->m_sym_ctx); - - // Rejoin the lists with the functions in front. - sc_list = sc_func_list; - sc_list.Append(sc_sym_list); + sc_list = SearchFunctionsInSymbolContexts(sc_list, frame_decl_context); } } @@ -1724,8 +1728,7 @@ void ClangExpressionDeclMap::AddOneGenericVariable(NameSearchContext &context, TypeFromUser user_type(scratch_ast_context->GetBasicType(eBasicTypeVoid) .GetPointerType() .GetLValueReferenceType()); - ClangASTContext *own_context = ClangASTContext::GetASTContext(m_ast_context); - TypeFromParser parser_type(own_context->GetBasicType(eBasicTypeVoid) + TypeFromParser parser_type(m_clang_ast_context->GetBasicType(eBasicTypeVoid) .GetPointerType() .GetLValueReferenceType()); NamedDecl *var_decl = context.AddVarDecl(parser_type); @@ -1766,8 +1769,8 @@ void ClangExpressionDeclMap::AddOneRegister(NameSearchContext &context, Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); CompilerType clang_type = - ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - m_ast_context, reg_info->encoding, reg_info->byte_size * 8); + m_clang_ast_context->GetBuiltinTypeForEncodingAndBitSize( + reg_info->encoding, reg_info->byte_size * 8); if (!clang_type) { LLDB_LOGF(log, " Tried to add a type for %s, but couldn't get one", @@ -2003,9 +2006,8 @@ void ClangExpressionDeclMap::AddThisType(NameSearchContext &context, if (copied_clang_type.IsAggregateType() && copied_clang_type.GetCompleteType()) { - ClangASTContext *own_context = - ClangASTContext::GetASTContext(m_ast_context); - CompilerType void_clang_type = own_context->GetBasicType(eBasicTypeVoid); + CompilerType void_clang_type = + m_clang_ast_context->GetBasicType(eBasicTypeVoid); CompilerType void_ptr_clang_type = void_clang_type.GetPointerType(); CompilerType method_type = ClangASTContext::CreateFunctionType( @@ -2018,12 +2020,10 @@ void ClangExpressionDeclMap::AddThisType(NameSearchContext &context, const bool is_attr_used = true; const bool is_artificial = false; - CXXMethodDecl *method_decl = - ClangASTContext::GetASTContext(m_ast_context) - ->AddMethodToCXXRecordType( - copied_clang_type.GetOpaqueQualType(), "$__lldb_expr", nullptr, - method_type, lldb::eAccessPublic, is_virtual, is_static, - is_inline, is_explicit, is_attr_used, is_artificial); + CXXMethodDecl *method_decl = m_clang_ast_context->AddMethodToCXXRecordType( + copied_clang_type.GetOpaqueQualType(), "$__lldb_expr", nullptr, + method_type, lldb::eAccessPublic, is_virtual, is_static, is_inline, + is_explicit, is_attr_used, is_artificial); LLDB_LOG(log, " CEDM::AddThisType Added function $__lldb_expr " diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h index 1f308edf20cf2..5cd16d5d16874 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h @@ -458,6 +458,23 @@ class ClangExpressionDeclMap : public ClangASTSource { unsigned current_id, SymbolContext &sym_ctx, CompilerDeclContext &namespace_decl); + /// Searches for functions in the given SymbolContextList. + /// + /// \param[in] sc_list + /// The SymbolContextList to search. + /// + /// \param[in] frame_decl_context + /// The current DeclContext of the current frame. + /// + /// \return + /// A SymbolContextList with any found functions in the front and + /// any unknown SymbolContexts which are not functions in the back. + /// The SymbolContexts for the functions are ordered by how close they are + /// to the DeclContext for the given frame DeclContext. + SymbolContextList SearchFunctionsInSymbolContexts( + const SymbolContextList &sc_list, + const CompilerDeclContext &frame_decl_context); + /// Looks up a function. /// /// \param[in] context diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index a0f966ddd5111..15b242a8b87ee 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -997,7 +997,7 @@ ClangExpressionParser::ParseInternal(DiagnosticManager &diagnostic_manager, } else { ast_context.setExternalSource(ast_source); } - decl_map->InstallASTContext(ast_context, m_compiler->getFileManager()); + decl_map->InstallASTContext(*m_ast_context, m_compiler->getFileManager()); } // Check that the ASTReader is properly attached to ASTContext and Sema. diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp index 19a987b0f0042..ff142e6f35ff2 100644 --- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp +++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp @@ -850,6 +850,7 @@ uint32_t EmulateInstructionARM::GetFramePointerRegisterNumber() const { /* On Apple iOS et al, the frame pointer register is always r7. * Typically on other ARM systems, thumb code uses r7; arm code uses r11. + * Windows on ARM, which is in thumb mode, uses r11 though. */ uint32_t fp_regnum = 11; @@ -857,7 +858,7 @@ uint32_t EmulateInstructionARM::GetFramePointerRegisterNumber() const { if (is_apple) fp_regnum = 7; - if (m_opcode_mode == eModeThumb) + if (m_opcode_mode == eModeThumb && !m_arch.GetTriple().isOSWindows()) fp_regnum = 7; return fp_regnum; @@ -879,6 +880,7 @@ uint32_t EmulateInstructionARM::GetFramePointerDWARFRegisterNumber() const { /* On Apple iOS et al, the frame pointer register is always r7. * Typically on other ARM systems, thumb code uses r7; arm code uses r11. + * Windows on ARM, which is in thumb mode, uses r11 though. */ uint32_t fp_regnum = dwarf_r11; @@ -886,7 +888,7 @@ uint32_t EmulateInstructionARM::GetFramePointerDWARFRegisterNumber() const { if (is_apple) fp_regnum = dwarf_r7; - if (m_opcode_mode == eModeThumb) + if (m_opcode_mode == eModeThumb && !m_arch.GetTriple().isOSWindows()) fp_regnum = dwarf_r7; return fp_regnum; @@ -1343,6 +1345,8 @@ bool EmulateInstructionARM::EmulateMOVRdRm(const uint32_t opcode, EmulateInstruction::Context context; if (Rd == 13) context.type = EmulateInstruction::eContextAdjustStackPointer; + else if (Rd == GetFramePointerRegisterNumber() && Rm == 13) + context.type = EmulateInstruction::eContextSetFramePointer; else context.type = EmulateInstruction::eContextRegisterPlusOffset; RegisterInfo dwarf_reg; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index c22f4ae9e41a9..4385a60f58623 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -284,46 +284,34 @@ class NodeAllocator { } }; -/// Given a mangled function `Mangled`, replace all the primitive function type -/// arguments of `Search` with type `Replace`. -class TypeSubstitutor - : public llvm::itanium_demangle::AbstractManglingParser +class ManglingSubstitutor + : public llvm::itanium_demangle::AbstractManglingParser { - /// Input character until which we have constructed the respective output - /// already - const char *Written; + using Base = + llvm::itanium_demangle::AbstractManglingParser; - llvm::StringRef Search; - llvm::StringRef Replace; - llvm::SmallString<128> Result; +public: + ManglingSubstitutor() : Base(nullptr, nullptr) {} - /// Whether we have performed any substitutions. - bool Substituted; + template + ConstString substitute(llvm::StringRef Mangled, Ts &&... Vals) { + this->getDerived().reset(Mangled, std::forward(Vals)...); + return substituteImpl(Mangled); + } - void reset(llvm::StringRef Mangled, llvm::StringRef Search, - llvm::StringRef Replace) { - AbstractManglingParser::reset(Mangled.begin(), Mangled.end()); + +protected: + void reset(llvm::StringRef Mangled) { + Base::reset(Mangled.begin(), Mangled.end()); Written = Mangled.begin(); - this->Search = Search; - this->Replace = Replace; Result.clear(); Substituted = false; } - void appendUnchangedInput() { - Result += llvm::StringRef(Written, First - Written); - Written = First; - } - -public: - TypeSubstitutor() : AbstractManglingParser(nullptr, nullptr) {} - - ConstString substitute(llvm::StringRef Mangled, llvm::StringRef From, - llvm::StringRef To) { + ConstString substituteImpl(llvm::StringRef Mangled) { Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_LANGUAGE); - - reset(Mangled, From, To); - if (parse() == nullptr) { + if (this->parse() == nullptr) { LLDB_LOG(log, "Failed to substitute mangling in {0}", Mangled); return ConstString(); } @@ -336,20 +324,69 @@ class TypeSubstitutor return ConstString(Result); } + void trySubstitute(llvm::StringRef From, llvm::StringRef To) { + if (!llvm::StringRef(currentParserPos(), this->numLeft()).startswith(From)) + return; + + // We found a match. Append unmodified input up to this point. + appendUnchangedInput(); + + // And then perform the replacement. + Result += To; + Written += From.size(); + Substituted = true; + } + +private: + /// Input character until which we have constructed the respective output + /// already. + const char *Written; + + llvm::SmallString<128> Result; + + /// Whether we have performed any substitutions. + bool Substituted; + + const char *currentParserPos() const { return this->First; } + + void appendUnchangedInput() { + Result += + llvm::StringRef(Written, std::distance(Written, currentParserPos())); + Written = currentParserPos(); + } + +}; + +/// Given a mangled function `Mangled`, replace all the primitive function type +/// arguments of `Search` with type `Replace`. +class TypeSubstitutor : public ManglingSubstitutor { + llvm::StringRef Search; + llvm::StringRef Replace; + +public: + void reset(llvm::StringRef Mangled, llvm::StringRef Search, + llvm::StringRef Replace) { + ManglingSubstitutor::reset(Mangled); + this->Search = Search; + this->Replace = Replace; + } + llvm::itanium_demangle::Node *parseType() { - if (llvm::StringRef(First, numLeft()).startswith(Search)) { - // We found a match. Append unmodified input up to this point. - appendUnchangedInput(); - - // And then perform the replacement. - Result += Replace; - Written += Search.size(); - Substituted = true; - } - return AbstractManglingParser::parseType(); + trySubstitute(Search, Replace); + return ManglingSubstitutor::parseType(); } }; -} + +class CtorDtorSubstitutor : public ManglingSubstitutor { +public: + llvm::itanium_demangle::Node * + parseCtorDtorName(llvm::itanium_demangle::Node *&SoFar, NameState *State) { + trySubstitute("C1", "C2"); + trySubstitute("D1", "D2"); + return ManglingSubstitutor::parseCtorDtorName(SoFar, State); + } +}; +} // namespace uint32_t CPlusPlusLanguage::FindAlternateFunctionManglings( const ConstString mangled_name, std::set &alternates) { @@ -397,6 +434,10 @@ uint32_t CPlusPlusLanguage::FindAlternateFunctionManglings( TS.substitute(mangled_name.GetStringRef(), "y", "m")) alternates.insert(ulong_fixup); + if (ConstString ctor_fixup = + CtorDtorSubstitutor().substitute(mangled_name.GetStringRef())) + alternates.insert(ctor_fixup); + return alternates.size() - start_size; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index 619c718a1c1b9..f6d8d4d9a7eb9 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -94,6 +94,8 @@ class MapIterator { MapIterator(ValueObject *entry, size_t depth = 0) : m_entry(entry), m_max_depth(depth), m_error(false) {} + MapIterator &operator=(const MapIterator &) = default; + ValueObjectSP value() { return m_entry.GetEntry(); } ValueObjectSP advance(size_t count) { diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp index b4d8ba2218a17..d556aae1c458c 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp @@ -62,7 +62,7 @@ bool CPPLanguageRuntime::GetObjectDescription( bool contains_lambda_identifier(llvm::StringRef &str_ref) { return str_ref.contains("$_") || str_ref.contains("'lambda'"); -}; +} CPPLanguageRuntime::LibCppStdFunctionCallableInfo line_entry_helper(Target &target, const SymbolContext &sc, Symbol *symbol, diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 9bdbef393e39f..750b6ce6b0c6a 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -1625,19 +1625,13 @@ AppleObjCRuntimeV2::UpdateISAToDescriptorMapSharedCache() { // Substitute in the correct class_getName / class_getNameRaw function name, // concatenate the two parts of our expression text. The format string // has two %s's, so provide the name twice. - int prefix_string_size = snprintf (nullptr, 0, + std::string shared_class_expression; + llvm::raw_string_ostream(shared_class_expression) << llvm::format( g_shared_cache_class_name_funcptr, class_name_getter_function_name.AsCString(), class_name_getter_function_name.AsCString()); - char *class_name_func_ptr_expr = (char*) malloc (prefix_string_size + 1); - snprintf (class_name_func_ptr_expr, prefix_string_size + 1, - g_shared_cache_class_name_funcptr, - class_name_getter_function_name.AsCString(), - class_name_getter_function_name.AsCString()); - std::string shared_class_expression = class_name_func_ptr_expr; shared_class_expression += g_get_shared_cache_class_info_body; - free (class_name_func_ptr_expr); m_get_shared_cache_class_info_code.reset( GetTargetRef().GetUtilityFunctionForLanguage( diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 3f8502548fc25..8eadaf1323d55 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1581,6 +1581,7 @@ static SectionType GetSectionTypeFromName(llvm::StringRef Name) { .Case("pubtypes", eSectionTypeDWARFDebugPubTypes) .Case("ranges", eSectionTypeDWARFDebugRanges) .Case("rnglists", eSectionTypeDWARFDebugRngLists) + .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo) .Case("str", eSectionTypeDWARFDebugStr) .Case("str.dwo", eSectionTypeDWARFDebugStrDwo) .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index aff1d1e87bb67..57c43de0c945d 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1140,6 +1140,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDWARFDebugPubTypes: case eSectionTypeDWARFDebugRanges: case eSectionTypeDWARFDebugRngLists: + case eSectionTypeDWARFDebugRngListsDwo: case eSectionTypeDWARFDebugStr: case eSectionTypeDWARFDebugStrDwo: case eSectionTypeDWARFDebugStrOffsets: diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 37e1120838f37..b0ce967a79665 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -114,9 +114,10 @@ const char *ObjectFilePECOFF::GetPluginDescriptionStatic() { ObjectFile *ObjectFilePECOFF::CreateInstance(const lldb::ModuleSP &module_sp, DataBufferSP &data_sp, lldb::offset_t data_offset, - const lldb_private::FileSpec *file, + const lldb_private::FileSpec *file_p, lldb::offset_t file_offset, lldb::offset_t length) { + FileSpec file = file_p ? *file_p : FileSpec(); if (!data_sp) { data_sp = MapFileData(file, length, file_offset); if (!data_sp) @@ -135,7 +136,7 @@ ObjectFile *ObjectFilePECOFF::CreateInstance(const lldb::ModuleSP &module_sp, } auto objfile_up = std::make_unique( - module_sp, data_sp, data_offset, file, file_offset, length); + module_sp, data_sp, data_offset, file_p, file_offset, length); if (!objfile_up || !objfile_up->ParseHeader()) return nullptr; @@ -787,6 +788,77 @@ bool ObjectFilePECOFF::IsStripped() { return false; } +SectionType ObjectFilePECOFF::GetSectionType(llvm::StringRef sect_name, + const section_header_t §) { + ConstString const_sect_name(sect_name); + static ConstString g_code_sect_name(".code"); + static ConstString g_CODE_sect_name("CODE"); + static ConstString g_data_sect_name(".data"); + static ConstString g_DATA_sect_name("DATA"); + static ConstString g_bss_sect_name(".bss"); + static ConstString g_BSS_sect_name("BSS"); + + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_CODE && + ((const_sect_name == g_code_sect_name) || + (const_sect_name == g_CODE_sect_name))) { + return eSectionTypeCode; + } + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA && + ((const_sect_name == g_data_sect_name) || + (const_sect_name == g_DATA_sect_name))) { + if (sect.size == 0 && sect.offset == 0) + return eSectionTypeZeroFill; + else + return eSectionTypeData; + } + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA && + ((const_sect_name == g_bss_sect_name) || + (const_sect_name == g_BSS_sect_name))) { + if (sect.size == 0) + return eSectionTypeZeroFill; + else + return eSectionTypeData; + } + + SectionType section_type = + llvm::StringSwitch(sect_name) + .Case(".debug", eSectionTypeDebug) + .Case(".stabstr", eSectionTypeDataCString) + .Case(".reloc", eSectionTypeOther) + .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev) + .Case(".debug_aranges", eSectionTypeDWARFDebugAranges) + .Case(".debug_frame", eSectionTypeDWARFDebugFrame) + .Case(".debug_info", eSectionTypeDWARFDebugInfo) + .Case(".debug_line", eSectionTypeDWARFDebugLine) + .Case(".debug_loc", eSectionTypeDWARFDebugLoc) + .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists) + .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo) + .Case(".debug_names", eSectionTypeDWARFDebugNames) + .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames) + .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes) + .Case(".debug_ranges", eSectionTypeDWARFDebugRanges) + .Case(".debug_str", eSectionTypeDWARFDebugStr) + .Case(".debug_types", eSectionTypeDWARFDebugTypes) + // .eh_frame can be truncated to 8 chars. + .Cases(".eh_frame", ".eh_fram", eSectionTypeEHFrame) + .Case(".gosymtab", eSectionTypeGoSymtab) + .Default(eSectionTypeInvalid); + if (section_type != eSectionTypeInvalid) + return section_type; + + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_CODE) + return eSectionTypeCode; + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + return eSectionTypeData; + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + if (sect.size == 0) + return eSectionTypeZeroFill; + else + return eSectionTypeData; + } + return eSectionTypeOther; +} + void ObjectFilePECOFF::CreateSections(SectionList &unified_section_list) { if (m_sections_up) return; @@ -810,104 +882,9 @@ void ObjectFilePECOFF::CreateSections(SectionList &unified_section_list) { const uint32_t nsects = m_sect_headers.size(); ModuleSP module_sp(GetModule()); for (uint32_t idx = 0; idx < nsects; ++idx) { - ConstString const_sect_name(GetSectionName(m_sect_headers[idx])); - static ConstString g_code_sect_name(".code"); - static ConstString g_CODE_sect_name("CODE"); - static ConstString g_data_sect_name(".data"); - static ConstString g_DATA_sect_name("DATA"); - static ConstString g_bss_sect_name(".bss"); - static ConstString g_BSS_sect_name("BSS"); - static ConstString g_debug_sect_name(".debug"); - static ConstString g_reloc_sect_name(".reloc"); - static ConstString g_stab_sect_name(".stab"); - static ConstString g_stabstr_sect_name(".stabstr"); - static ConstString g_sect_name_dwarf_debug_abbrev(".debug_abbrev"); - static ConstString g_sect_name_dwarf_debug_aranges(".debug_aranges"); - static ConstString g_sect_name_dwarf_debug_frame(".debug_frame"); - static ConstString g_sect_name_dwarf_debug_info(".debug_info"); - static ConstString g_sect_name_dwarf_debug_line(".debug_line"); - static ConstString g_sect_name_dwarf_debug_loc(".debug_loc"); - static ConstString g_sect_name_dwarf_debug_loclists(".debug_loclists"); - static ConstString g_sect_name_dwarf_debug_macinfo(".debug_macinfo"); - static ConstString g_sect_name_dwarf_debug_names(".debug_names"); - static ConstString g_sect_name_dwarf_debug_pubnames(".debug_pubnames"); - static ConstString g_sect_name_dwarf_debug_pubtypes(".debug_pubtypes"); - static ConstString g_sect_name_dwarf_debug_ranges(".debug_ranges"); - static ConstString g_sect_name_dwarf_debug_str(".debug_str"); - static ConstString g_sect_name_dwarf_debug_types(".debug_types"); - static ConstString g_sect_name_eh_frame(".eh_frame"); - static ConstString g_sect_name_go_symtab(".gosymtab"); - SectionType section_type = eSectionTypeOther; - if (m_sect_headers[idx].flags & llvm::COFF::IMAGE_SCN_CNT_CODE && - ((const_sect_name == g_code_sect_name) || - (const_sect_name == g_CODE_sect_name))) { - section_type = eSectionTypeCode; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA && - ((const_sect_name == g_data_sect_name) || - (const_sect_name == g_DATA_sect_name))) { - if (m_sect_headers[idx].size == 0 && m_sect_headers[idx].offset == 0) - section_type = eSectionTypeZeroFill; - else - section_type = eSectionTypeData; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA && - ((const_sect_name == g_bss_sect_name) || - (const_sect_name == g_BSS_sect_name))) { - if (m_sect_headers[idx].size == 0) - section_type = eSectionTypeZeroFill; - else - section_type = eSectionTypeData; - } else if (const_sect_name == g_debug_sect_name) { - section_type = eSectionTypeDebug; - } else if (const_sect_name == g_stabstr_sect_name) { - section_type = eSectionTypeDataCString; - } else if (const_sect_name == g_reloc_sect_name) { - section_type = eSectionTypeOther; - } else if (const_sect_name == g_sect_name_dwarf_debug_abbrev) - section_type = eSectionTypeDWARFDebugAbbrev; - else if (const_sect_name == g_sect_name_dwarf_debug_aranges) - section_type = eSectionTypeDWARFDebugAranges; - else if (const_sect_name == g_sect_name_dwarf_debug_frame) - section_type = eSectionTypeDWARFDebugFrame; - else if (const_sect_name == g_sect_name_dwarf_debug_info) - section_type = eSectionTypeDWARFDebugInfo; - else if (const_sect_name == g_sect_name_dwarf_debug_line) - section_type = eSectionTypeDWARFDebugLine; - else if (const_sect_name == g_sect_name_dwarf_debug_loc) - section_type = eSectionTypeDWARFDebugLoc; - else if (const_sect_name == g_sect_name_dwarf_debug_loclists) - section_type = eSectionTypeDWARFDebugLocLists; - else if (const_sect_name == g_sect_name_dwarf_debug_macinfo) - section_type = eSectionTypeDWARFDebugMacInfo; - else if (const_sect_name == g_sect_name_dwarf_debug_names) - section_type = eSectionTypeDWARFDebugNames; - else if (const_sect_name == g_sect_name_dwarf_debug_pubnames) - section_type = eSectionTypeDWARFDebugPubNames; - else if (const_sect_name == g_sect_name_dwarf_debug_pubtypes) - section_type = eSectionTypeDWARFDebugPubTypes; - else if (const_sect_name == g_sect_name_dwarf_debug_ranges) - section_type = eSectionTypeDWARFDebugRanges; - else if (const_sect_name == g_sect_name_dwarf_debug_str) - section_type = eSectionTypeDWARFDebugStr; - else if (const_sect_name == g_sect_name_dwarf_debug_types) - section_type = eSectionTypeDWARFDebugTypes; - else if (const_sect_name == g_sect_name_eh_frame) - section_type = eSectionTypeEHFrame; - else if (const_sect_name == g_sect_name_go_symtab) - section_type = eSectionTypeGoSymtab; - else if (m_sect_headers[idx].flags & llvm::COFF::IMAGE_SCN_CNT_CODE) { - section_type = eSectionTypeCode; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) { - section_type = eSectionTypeData; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { - if (m_sect_headers[idx].size == 0) - section_type = eSectionTypeZeroFill; - else - section_type = eSectionTypeData; - } + llvm::StringRef sect_name = GetSectionName(m_sect_headers[idx]); + ConstString const_sect_name(sect_name); + SectionType section_type = GetSectionType(sect_name, m_sect_headers[idx]); SectionSP section_sp(new Section( module_sp, // Module to which this section belongs diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h index 78088ecc43778..c0efe702f5700 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h @@ -283,6 +283,8 @@ class ObjectFilePECOFF : public lldb_private::ObjectFile { void DumpDependentModules(lldb_private::Stream *s); llvm::StringRef GetSectionName(const section_header_t §); + static lldb::SectionType GetSectionType(llvm::StringRef sect_name, + const section_header_t §); typedef std::vector SectionHeaderColl; typedef SectionHeaderColl::iterator SectionHeaderCollIter; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 6a3e6b4cadefc..ae9f20db43cc2 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -1106,7 +1106,7 @@ static FileSpec GetXcodeSelectPath() { std::string command_output; Status status = Host::RunShellCommand("/usr/bin/xcode-select --print-path", - nullptr, // current working directory + FileSpec(), // current working directory &exit_status, &signo, &command_output, std::chrono::seconds(2), // short timeout false); // don't run in a shell diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp index 95ba81a2ab493..134a4c7c80759 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp @@ -180,11 +180,11 @@ ConstString PlatformMacOSX::GetSDKDirectory(lldb_private::Target &target) { std::string output; const char *command = "xcrun -sdk macosx --show-sdk-path"; lldb_private::Status error = RunShellCommand( - command, // shell command to run - nullptr, // current working directory - &status, // Put the exit status of the process in here - &signo, // Put the signal that caused the process to exit in - // here + command, // shell command to run + FileSpec(), // current working directory + &status, // Put the exit status of the process in here + &signo, // Put the signal that caused the process to exit in + // here &output, // Get the output from the command and place it in this // string std::chrono::seconds(3)); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp index e9bb29293189d..0aa129c808d43 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp @@ -449,12 +449,10 @@ Status PlatformRemoteDarwinDevice::GetSymbolFile(const FileSpec &platform_file, Status error; char platform_file_path[PATH_MAX]; if (platform_file.GetPath(platform_file_path, sizeof(platform_file_path))) { - char resolved_path[PATH_MAX]; - const char *os_version_dir = GetDeviceSupportDirectoryForOSVersion(); if (os_version_dir) { - ::snprintf(resolved_path, sizeof(resolved_path), "%s/%s", os_version_dir, - platform_file_path); + std::string resolved_path = + (llvm::Twine(os_version_dir) + "/" + platform_file_path).str(); local_file.SetFile(resolved_path, FileSpec::Style::native); FileSystem::Instance().Resolve(local_file); @@ -466,31 +464,28 @@ Status PlatformRemoteDarwinDevice::GetSymbolFile(const FileSpec &platform_file, return error; } - ::snprintf(resolved_path, sizeof(resolved_path), "%s/Symbols.Internal/%s", - os_version_dir, platform_file_path); + resolved_path = (llvm::Twine(os_version_dir) + "/Symbols.Internal/" + + platform_file_path) + .str(); local_file.SetFile(resolved_path, FileSpec::Style::native); FileSystem::Instance().Resolve(local_file); if (FileSystem::Instance().Exists(local_file)) { - if (log) { - LLDB_LOGF( - log, - "Found a copy of %s in the DeviceSupport dir %s/Symbols.Internal", - platform_file_path, os_version_dir); - } + LLDB_LOGF( + log, + "Found a copy of %s in the DeviceSupport dir %s/Symbols.Internal", + platform_file_path, os_version_dir); return error; } - ::snprintf(resolved_path, sizeof(resolved_path), "%s/Symbols/%s", - os_version_dir, platform_file_path); + resolved_path = + (llvm::Twine(os_version_dir) + "/Symbols/" + platform_file_path) + .str(); local_file.SetFile(resolved_path, FileSpec::Style::native); FileSystem::Instance().Resolve(local_file); if (FileSystem::Instance().Exists(local_file)) { - if (log) { - LLDB_LOGF(log, - "Found a copy of %s in the DeviceSupport dir %s/Symbols", - platform_file_path, os_version_dir); - } + LLDB_LOGF(log, "Found a copy of %s in the DeviceSupport dir %s/Symbols", + platform_file_path, os_version_dir); return error; } } diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp index b12e21deb4592..f24856bc5b3f6 100644 --- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp +++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp @@ -223,7 +223,7 @@ static uint32_t chown_file(Platform *platform, const char *path, command.Printf(":%d", gid); command.Printf("%s", path); int status; - platform->RunShellCommand(command.GetData(), nullptr, &status, nullptr, + platform->RunShellCommand(command.GetData(), FileSpec(), &status, nullptr, nullptr, std::chrono::seconds(10)); return status; } @@ -235,7 +235,7 @@ PlatformPOSIX::PutFile(const lldb_private::FileSpec &source, Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PLATFORM)); if (IsHost()) { - if (FileSpec::Equal(source, destination, true)) + if (source == destination) return Status(); // cp src dst // chown uid:gid dst @@ -248,7 +248,7 @@ PlatformPOSIX::PutFile(const lldb_private::FileSpec &source, StreamString command; command.Printf("cp %s %s", src_path.c_str(), dst_path.c_str()); int status; - RunShellCommand(command.GetData(), nullptr, &status, nullptr, nullptr, + RunShellCommand(command.GetData(), FileSpec(), &status, nullptr, nullptr, std::chrono::seconds(10)); if (status != 0) return Status("unable to perform copy"); @@ -278,7 +278,7 @@ PlatformPOSIX::PutFile(const lldb_private::FileSpec &source, GetHostname(), dst_path.c_str()); LLDB_LOGF(log, "[PutFile] Running command: %s\n", command.GetData()); int retcode; - Host::RunShellCommand(command.GetData(), nullptr, &retcode, nullptr, + Host::RunShellCommand(command.GetData(), FileSpec(), &retcode, nullptr, nullptr, std::chrono::minutes(1)); if (retcode == 0) { // Don't chown a local file for a remote system @@ -307,14 +307,14 @@ lldb_private::Status PlatformPOSIX::GetFile( if (dst_path.empty()) return Status("unable to get file path for destination"); if (IsHost()) { - if (FileSpec::Equal(source, destination, true)) + if (source == destination) return Status("local scenario->source and destination are the same file " "path: no operation performed"); // cp src dst StreamString cp_command; cp_command.Printf("cp %s %s", src_path.c_str(), dst_path.c_str()); int status; - RunShellCommand(cp_command.GetData(), nullptr, &status, nullptr, nullptr, + RunShellCommand(cp_command.GetData(), FileSpec(), &status, nullptr, nullptr, std::chrono::seconds(10)); if (status != 0) return Status("unable to perform copy"); @@ -335,7 +335,7 @@ lldb_private::Status PlatformPOSIX::GetFile( dst_path.c_str()); LLDB_LOGF(log, "[GetFile] Running command: %s\n", command.GetData()); int retcode; - Host::RunShellCommand(command.GetData(), nullptr, &retcode, nullptr, + Host::RunShellCommand(command.GetData(), FileSpec(), &retcode, nullptr, nullptr, std::chrono::minutes(1)); if (retcode == 0) return Status(); diff --git a/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp index 28e7a590ff9f3..c3cb45530f2ad 100644 --- a/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp @@ -154,15 +154,8 @@ bool RegisterContextWindows::CacheAllRegisterValues() { return true; TargetThreadWindows &wthread = static_cast(m_thread); - uint8_t buffer[2048]; - memset(buffer, 0, sizeof(buffer)); - PCONTEXT tmpContext = NULL; - DWORD contextLength = (DWORD)sizeof(buffer); - if (!::InitializeContext(buffer, kWinContextFlags, &tmpContext, - &contextLength)) { - return false; - } - memcpy(&m_context, tmpContext, sizeof(m_context)); + memset(&m_context, 0, sizeof(m_context)); + m_context.ContextFlags = kWinContextFlags; if (::SuspendThread( wthread.GetHostThread().GetNativeThread().GetSystemHandle()) == (DWORD)-1) { diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 5d1dd79c2ffa7..dfef06aa6eafb 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -388,36 +388,6 @@ bool ProcessGDBRemote::ParsePythonTargetDefinition( return false; } -// If the remote stub didn't give us eh_frame or DWARF register numbers for a -// register, see if the ABI can provide them. -// DWARF and eh_frame register numbers are defined as a part of the ABI. -static void AugmentRegisterInfoViaABI(RegisterInfo ®_info, - ConstString reg_name, ABISP abi_sp) { - if (reg_info.kinds[eRegisterKindEHFrame] == LLDB_INVALID_REGNUM || - reg_info.kinds[eRegisterKindDWARF] == LLDB_INVALID_REGNUM) { - if (abi_sp) { - RegisterInfo abi_reg_info; - if (abi_sp->GetRegisterInfoByName(reg_name, abi_reg_info)) { - if (reg_info.kinds[eRegisterKindEHFrame] == LLDB_INVALID_REGNUM && - abi_reg_info.kinds[eRegisterKindEHFrame] != LLDB_INVALID_REGNUM) { - reg_info.kinds[eRegisterKindEHFrame] = - abi_reg_info.kinds[eRegisterKindEHFrame]; - } - if (reg_info.kinds[eRegisterKindDWARF] == LLDB_INVALID_REGNUM && - abi_reg_info.kinds[eRegisterKindDWARF] != LLDB_INVALID_REGNUM) { - reg_info.kinds[eRegisterKindDWARF] = - abi_reg_info.kinds[eRegisterKindDWARF]; - } - if (reg_info.kinds[eRegisterKindGeneric] == LLDB_INVALID_REGNUM && - abi_reg_info.kinds[eRegisterKindGeneric] != LLDB_INVALID_REGNUM) { - reg_info.kinds[eRegisterKindGeneric] = - abi_reg_info.kinds[eRegisterKindGeneric]; - } - } - } - } -} - static size_t SplitCommaSeparatedRegisterNumberString( const llvm::StringRef &comma_separated_regiter_numbers, std::vector ®nums, int base) { @@ -615,12 +585,12 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) { reg_info.invalidate_regs = invalidate_regs.data(); } + reg_info.name = reg_name.AsCString(); // We have to make a temporary ABI here, and not use the GetABI because // this code gets called in DidAttach, when the target architecture // (and consequently the ABI we'll get from the process) may be wrong. - ABISP abi_to_use = ABI::FindPlugin(shared_from_this(), arch_to_use); - - AugmentRegisterInfoViaABI(reg_info, reg_name, abi_to_use); + if (ABISP abi_sp = ABI::FindPlugin(shared_from_this(), arch_to_use)) + abi_sp->AugmentRegisterInfo(reg_info); m_register_info.AddRegister(reg_info, reg_name, alt_name, set_name); } else { @@ -4483,7 +4453,9 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info, } ++cur_reg_num; - AugmentRegisterInfoViaABI(reg_info, reg_name, abi_sp); + reg_info.name = reg_name.AsCString(); + if (abi_sp) + abi_sp->AugmentRegisterInfo(reg_info); dyn_reg_info.AddRegister(reg_info, reg_name, alt_name, set_name); return true; // Keep iterating through all "reg" elements diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 5ed01cf479344..f6b918399cdc7 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -77,11 +77,23 @@ extern "C" void init_lldb(void); #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreturn-type-c-linkage" +// Disable warning C4190: 'LLDBSwigPythonBreakpointCallbackFunction' has +// C-linkage specified, but returns UDT 'llvm::Expected' which is +// incompatible with C +#if _MSC_VER +#pragma warning (push) +#pragma warning (disable : 4190) +#endif + extern "C" llvm::Expected LLDBSwigPythonBreakpointCallbackFunction( const char *python_function_name, const char *session_dictionary_name, const lldb::StackFrameSP &sb_frame, const lldb::BreakpointLocationSP &sb_bp_loc, StructuredDataImpl *args_impl); +#if _MSC_VER +#pragma warning (pop) +#endif + #pragma clang diagnostic pop extern "C" bool LLDBSwigPythonWatchpointCallbackFunction( diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index 29d2e8a0c6a84..b2c4d08833414 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -731,7 +731,7 @@ void SymbolFileBreakpad::ParseLineTableAndSupportFiles(CompileUnit &cu, } if (next_addr) finish_sequence(); - data.support_files = map.translate(cu, *m_files); + data.support_files = map.translate(cu.GetPrimaryFile(), *m_files); } void SymbolFileBreakpad::ParseUnwindData() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 01655f04c4223..09f5b28449cb1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -463,13 +463,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); - Type::ResolveState resolve_state = Type::ResolveState::Unresolved; - - Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; - CompilerType clang_type; - TypeSP type_sp; - LanguageType cu_language = die.GetLanguage(); + switch (tag) { case DW_TAG_typedef: case DW_TAG_base_type: @@ -480,844 +475,888 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, case DW_TAG_restrict_type: case DW_TAG_volatile_type: case DW_TAG_unspecified_type: { - if (tag == DW_TAG_typedef && attrs.type.IsValid()) { - // Try to parse a typedef from the (DWARF embedded in the) Clang - // module file first as modules can contain typedef'ed - // structures that have no names like: - // - // typedef struct { int a; } Foo; - // - // In this case we will have a structure with no name and a - // typedef named "Foo" that points to this unnamed - // structure. The name in the typedef is the only identifier for - // the struct, so always try to get typedefs from Clang modules - // if possible. - // - // The type_sp returned will be empty if the typedef doesn't - // exist in a module file, so it is cheap to call this function - // just to check. - // - // If we don't do this we end up creating a TypeSP that says - // this is a typedef to type 0x123 (the DW_AT_type value would - // be 0x123 in the DW_TAG_typedef), and this is the unnamed - // structure type. We will have a hard time tracking down an - // unnammed structure type in the module debug info, so we make - // sure we don't get into this situation by always resolving - // typedefs from the module. - const DWARFDIE encoding_die = attrs.type.Reference(); - - // First make sure that the die that this is typedef'ed to _is_ - // just a declaration (DW_AT_declaration == 1), not a full - // definition since template types can't be represented in - // modules since only concrete instances of templates are ever - // emitted and modules won't contain those - if (encoding_die && - encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) { - type_sp = ParseTypeFromClangModule(sc, die, log); - if (type_sp) - return type_sp; - } - } + type_sp = ParseTypeModifier(sc, die, attrs); + break; + } - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\") type => 0x%8.8lx\n", - die.GetID(), DW_TAG_value_to_name(tag), type_name_cstr, - encoding_uid.Reference()); + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_class_type: { + type_sp = ParseStructureLikeDIE(sc, die, attrs); + break; + } - switch (tag) { - default: - break; + case DW_TAG_enumeration_type: { + type_sp = ParseEnum(sc, die, attrs); + break; + } - case DW_TAG_unspecified_type: - if (attrs.name == "nullptr_t" || attrs.name == "decltype(nullptr)") { - resolve_state = Type::ResolveState::Full; - clang_type = m_ast.GetBasicType(eBasicTypeNullPtr); - break; - } - // Fall through to base type below in case we can handle the type - // there... - LLVM_FALLTHROUGH; + case DW_TAG_inlined_subroutine: + case DW_TAG_subprogram: + case DW_TAG_subroutine_type: { + type_sp = ParseSubroutine(die, attrs); + break; + } + case DW_TAG_array_type: { + type_sp = ParseArrayType(die, attrs); + break; + } + case DW_TAG_ptr_to_member_type: { + type_sp = ParsePointerToMemberType(die, attrs); + break; + } + default: + dwarf->GetObjectFile()->GetModule()->ReportError( + "{0x%8.8x}: unhandled type tag 0x%4.4x (%s), please file a bug and " + "attach the file at the start of this error message", + die.GetOffset(), tag, DW_TAG_value_to_name(tag)); + break; + } - case DW_TAG_base_type: - resolve_state = Type::ResolveState::Full; - clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( - attrs.name.GetCString(), attrs.encoding, - attrs.byte_size.getValueOr(0) * 8); - break; + // TODO: We should consider making the switch above exhaustive to simplify + // control flow in ParseTypeFromDWARF. Then, we could simply replace this + // return statement with a call to llvm_unreachable. + return UpdateSymbolContextScopeForType(sc, die, type_sp); +} - case DW_TAG_pointer_type: - encoding_data_type = Type::eEncodingIsPointerUID; - break; - case DW_TAG_reference_type: - encoding_data_type = Type::eEncodingIsLValueReferenceUID; - break; - case DW_TAG_rvalue_reference_type: - encoding_data_type = Type::eEncodingIsRValueReferenceUID; - break; - case DW_TAG_typedef: - encoding_data_type = Type::eEncodingIsTypedefUID; - break; - case DW_TAG_const_type: - encoding_data_type = Type::eEncodingIsConstUID; - break; - case DW_TAG_restrict_type: - encoding_data_type = Type::eEncodingIsRestrictUID; - break; - case DW_TAG_volatile_type: - encoding_data_type = Type::eEncodingIsVolatileUID; +lldb::TypeSP +DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); + LanguageType cu_language = die.GetLanguage(); + Type::ResolveState resolve_state = Type::ResolveState::Unresolved; + Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; + TypeSP type_sp; + CompilerType clang_type; + + if (tag == DW_TAG_typedef && attrs.type.IsValid()) { + // Try to parse a typedef from the (DWARF embedded in the) Clang + // module file first as modules can contain typedef'ed + // structures that have no names like: + // + // typedef struct { int a; } Foo; + // + // In this case we will have a structure with no name and a + // typedef named "Foo" that points to this unnamed + // structure. The name in the typedef is the only identifier for + // the struct, so always try to get typedefs from Clang modules + // if possible. + // + // The type_sp returned will be empty if the typedef doesn't + // exist in a module file, so it is cheap to call this function + // just to check. + // + // If we don't do this we end up creating a TypeSP that says + // this is a typedef to type 0x123 (the DW_AT_type value would + // be 0x123 in the DW_TAG_typedef), and this is the unnamed + // structure type. We will have a hard time tracking down an + // unnammed structure type in the module debug info, so we make + // sure we don't get into this situation by always resolving + // typedefs from the module. + const DWARFDIE encoding_die = attrs.type.Reference(); + + // First make sure that the die that this is typedef'ed to _is_ + // just a declaration (DW_AT_declaration == 1), not a full + // definition since template types can't be represented in + // modules since only concrete instances of templates are ever + // emitted and modules won't contain those + if (encoding_die && + encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) { + type_sp = ParseTypeFromClangModule(sc, die, log); + if (type_sp) + return type_sp; + } + } + + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\") type => 0x%8.8lx\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr, + encoding_uid.Reference()); + + switch (tag) { + default: + break; + + case DW_TAG_unspecified_type: + if (attrs.name == "nullptr_t" || attrs.name == "decltype(nullptr)") { + resolve_state = Type::ResolveState::Full; + clang_type = m_ast.GetBasicType(eBasicTypeNullPtr); break; } + // Fall through to base type below in case we can handle the type + // there... + LLVM_FALLTHROUGH; - if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID || - encoding_data_type == Type::eEncodingIsTypedefUID)) { - if (tag == DW_TAG_pointer_type) { - DWARFDIE target_die = die.GetReferencedDIE(DW_AT_type); - - if (target_die.GetAttributeValueAsUnsigned(DW_AT_APPLE_block, 0)) { - // Blocks have a __FuncPtr inside them which is a pointer to a - // function of the proper type. - - for (DWARFDIE child_die = target_die.GetFirstChild(); - child_die.IsValid(); child_die = child_die.GetSibling()) { - if (!strcmp(child_die.GetAttributeValueAsString(DW_AT_name, ""), - "__FuncPtr")) { - DWARFDIE function_pointer_type = - child_die.GetReferencedDIE(DW_AT_type); - - if (function_pointer_type) { - DWARFDIE function_type = - function_pointer_type.GetReferencedDIE(DW_AT_type); - - bool function_type_is_new_pointer; - TypeSP lldb_function_type_sp = ParseTypeFromDWARF( - sc, function_type, &function_type_is_new_pointer); - - if (lldb_function_type_sp) { - clang_type = m_ast.CreateBlockPointerType( - lldb_function_type_sp->GetForwardCompilerType()); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } - } + case DW_TAG_base_type: + resolve_state = Type::ResolveState::Full; + clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + attrs.name.GetCString(), attrs.encoding, + attrs.byte_size.getValueOr(0) * 8); + break; - break; + case DW_TAG_pointer_type: + encoding_data_type = Type::eEncodingIsPointerUID; + break; + case DW_TAG_reference_type: + encoding_data_type = Type::eEncodingIsLValueReferenceUID; + break; + case DW_TAG_rvalue_reference_type: + encoding_data_type = Type::eEncodingIsRValueReferenceUID; + break; + case DW_TAG_typedef: + encoding_data_type = Type::eEncodingIsTypedefUID; + break; + case DW_TAG_const_type: + encoding_data_type = Type::eEncodingIsConstUID; + break; + case DW_TAG_restrict_type: + encoding_data_type = Type::eEncodingIsRestrictUID; + break; + case DW_TAG_volatile_type: + encoding_data_type = Type::eEncodingIsVolatileUID; + break; + } + + if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID || + encoding_data_type == Type::eEncodingIsTypedefUID)) { + if (tag == DW_TAG_pointer_type) { + DWARFDIE target_die = die.GetReferencedDIE(DW_AT_type); + + if (target_die.GetAttributeValueAsUnsigned(DW_AT_APPLE_block, 0)) { + // Blocks have a __FuncPtr inside them which is a pointer to a + // function of the proper type. + + for (DWARFDIE child_die = target_die.GetFirstChild(); + child_die.IsValid(); child_die = child_die.GetSibling()) { + if (!strcmp(child_die.GetAttributeValueAsString(DW_AT_name, ""), + "__FuncPtr")) { + DWARFDIE function_pointer_type = + child_die.GetReferencedDIE(DW_AT_type); + + if (function_pointer_type) { + DWARFDIE function_type = + function_pointer_type.GetReferencedDIE(DW_AT_type); + + bool function_type_is_new_pointer; + TypeSP lldb_function_type_sp = ParseTypeFromDWARF( + sc, function_type, &function_type_is_new_pointer); + + if (lldb_function_type_sp) { + clang_type = m_ast.CreateBlockPointerType( + lldb_function_type_sp->GetForwardCompilerType()); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } } + + break; } } } + } - if (cu_language == eLanguageTypeObjC || - cu_language == eLanguageTypeObjC_plus_plus) { - if (attrs.name) { - static ConstString g_objc_type_name_id("id"); - static ConstString g_objc_type_name_Class("Class"); - static ConstString g_objc_type_name_selector("SEL"); + if (cu_language == eLanguageTypeObjC || + cu_language == eLanguageTypeObjC_plus_plus) { + if (attrs.name) { + if (attrs.name == "id") { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " + "is Objective-C 'id' built-in type.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCID); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } else if (attrs.name == "Class") { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " + "is Objective-C 'Class' built-in type.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCClass); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } else if (attrs.name == "SEL") { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " + "is Objective-C 'selector' built-in type.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCSel); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } + } else if (encoding_data_type == Type::eEncodingIsPointerUID && + attrs.type.IsValid()) { + // Clang sometimes erroneously emits id as objc_object*. In that + // case we fix up the type to "id". - if (attrs.name == g_objc_type_name_id) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " - "is Objective-C 'id' built-in type.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCID); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; + const DWARFDIE encoding_die = attrs.type.Reference(); - } else if (attrs.name == g_objc_type_name_Class) { + if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) { + llvm::StringRef struct_name = encoding_die.GetName(); + if (struct_name == "objc_object") { if (log) dwarf->GetObjectFile()->GetModule()->LogMessage( log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " - "is Objective-C 'Class' built-in type.", + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s " + "'%s' is 'objc_object*', which we overrode to " + "'id'.", die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCClass); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } else if (attrs.name == g_objc_type_name_selector) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " - "is Objective-C 'selector' built-in type.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCSel); + clang_type = m_ast.GetBasicType(eBasicTypeObjCID); encoding_data_type = Type::eEncodingIsUID; attrs.type.Clear(); resolve_state = Type::ResolveState::Full; } - } else if (encoding_data_type == Type::eEncodingIsPointerUID && - attrs.type.IsValid()) { - // Clang sometimes erroneously emits id as objc_object*. In that - // case we fix up the type to "id". - - const DWARFDIE encoding_die = attrs.type.Reference(); - - if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) { - if (const char *struct_name = encoding_die.GetName()) { - if (!strcmp(struct_name, "objc_object")) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s " - "'%s' is 'objc_object*', which we overrode to " - "'id'.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCID); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } - } - } } } } - - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, - dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl, - clang_type, resolve_state); - - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - } break; - - case DW_TAG_structure_type: - case DW_TAG_union_type: - case DW_TAG_class_type: { - assert((!type_sp && !clang_type) && - "Did not expect partially computed structure-like type"); - TypeSP struct_like_type_sp = ParseStructureLikeDIE(sc, die, attrs); - return UpdateSymbolContextScopeForType(sc, die, struct_like_type_sp); } - case DW_TAG_enumeration_type: { - if (attrs.is_forward_declaration) { - type_sp = ParseTypeFromClangModule(sc, die, log); - if (type_sp) - return type_sp; + type_sp = std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl, + clang_type, resolve_state); - DWARFDeclContext die_decl_ctx; - die.GetDWARFDeclContext(die_decl_ctx); + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; +} - type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx); +TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); + TypeSP type_sp; - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = - dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, - // see if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext( - die_decl_ctx); - } - } + if (attrs.is_forward_declaration) { + type_sp = ParseTypeFromClangModule(sc, die, log); + if (type_sp) + return type_sp; - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " - "forward declaration, complete type is 0x%8.8" PRIx64, - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), attrs.name.GetCString(), - type_sp->GetID()); - } + DWARFDeclContext die_decl_ctx; + die.GetDWARFDeclContext(die_decl_ctx); - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this - // die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - clang::DeclContext *defn_decl_ctx = - GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); - if (defn_decl_ctx) - LinkDeclContextToDIE(defn_decl_ctx, die); - return type_sp; + type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx); + + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, + // see if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext( + die_decl_ctx); } } - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); - - CompilerType enumerator_clang_type; - clang_type.SetCompilerType( - &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE())); - if (!clang_type) { - if (attrs.type.IsValid()) { - Type *enumerator_type = - dwarf->ResolveTypeUID(attrs.type.Reference(), true); - if (enumerator_type) - enumerator_clang_type = enumerator_type->GetFullCompilerType(); - } - if (!enumerator_clang_type) { - if (attrs.byte_size) { - enumerator_clang_type = - m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( - NULL, DW_ATE_signed, *attrs.byte_size * 8); - } else { - enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt); - } + if (type_sp) { + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " + "forward declaration, complete type is 0x%8.8" PRIx64, + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), attrs.name.GetCString(), + type_sp->GetID()); } - clang_type = m_ast.CreateEnumerationType( - attrs.name.GetCString(), - GetClangDeclContextContainingDIE(die, nullptr), attrs.decl, - enumerator_clang_type, attrs.is_scoped_enum); - } else { - enumerator_clang_type = - m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType()); + // We found a real definition for this type elsewhere so lets use + // it and cache the fact that we found a complete type for this + // die + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + clang::DeclContext *defn_decl_ctx = + GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); + if (defn_decl_ctx) + LinkDeclContextToDIE(defn_decl_ctx, die); + return type_sp; } + } + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); - LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type), - die); - - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, - dwarf->GetUID(attrs.type.Reference()), Type::eEncodingIsUID, - &attrs.decl, clang_type, Type::ResolveState::Forward); + CompilerType enumerator_clang_type; + CompilerType clang_type; + clang_type.SetCompilerType( + &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE())); + if (!clang_type) { + if (attrs.type.IsValid()) { + Type *enumerator_type = + dwarf->ResolveTypeUID(attrs.type.Reference(), true); + if (enumerator_type) + enumerator_clang_type = enumerator_type->GetFullCompilerType(); + } - if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { - if (die.HasChildren()) { - bool is_signed = false; - enumerator_clang_type.IsIntegerType(is_signed); - ParseChildEnumerators(clang_type, is_signed, - type_sp->GetByteSize().getValueOr(0), die); + if (!enumerator_clang_type) { + if (attrs.byte_size) { + enumerator_clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + NULL, DW_ATE_signed, *attrs.byte_size * 8); + } else { + enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt); } - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); - } else { - dwarf->GetObjectFile()->GetModule()->ReportError( - "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its " - "definition.\nPlease file a bug and attach the file at the " - "start of this error message", - die.GetOffset(), attrs.name.GetCString()); } - } break; - case DW_TAG_inlined_subroutine: - case DW_TAG_subprogram: - case DW_TAG_subroutine_type: { - bool is_variadic = false; - bool is_static = false; - bool has_template_params = false; + clang_type = m_ast.CreateEnumerationType( + attrs.name.GetCString(), GetClangDeclContextContainingDIE(die, nullptr), + attrs.decl, enumerator_clang_type, attrs.is_scoped_enum); + } else { + enumerator_clang_type = + m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType()); + } - unsigned type_quals = 0; + LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type), die); - std::string object_pointer_name; - if (attrs.object_pointer) { - const char *object_pointer_name_cstr = attrs.object_pointer.GetName(); - if (object_pointer_name_cstr) - object_pointer_name = object_pointer_name_cstr; - } + type_sp = std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), Type::eEncodingIsUID, &attrs.decl, + clang_type, Type::ResolveState::Forward); - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); + if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { + if (die.HasChildren()) { + bool is_signed = false; + enumerator_clang_type.IsIntegerType(is_signed); + ParseChildEnumerators(clang_type, is_signed, + type_sp->GetByteSize().getValueOr(0), die); + } + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + } else { + dwarf->GetObjectFile()->GetModule()->ReportError( + "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its " + "definition.\nPlease file a bug and attach the file at the " + "start of this error message", + die.GetOffset(), attrs.name.GetCString()); + } + return type_sp; +} - CompilerType return_clang_type; - Type *func_type = NULL; +TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); - if (attrs.type.IsValid()) - func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); - if (func_type) - return_clang_type = func_type->GetForwardCompilerType(); - else - return_clang_type = m_ast.GetBasicType(eBasicTypeVoid); + bool is_variadic = false; + bool is_static = false; + bool has_template_params = false; - std::vector function_param_types; - std::vector function_param_decls; + unsigned type_quals = 0; - // Parse the function children for the parameters + std::string object_pointer_name; + if (attrs.object_pointer) { + const char *object_pointer_name_cstr = attrs.object_pointer.GetName(); + if (object_pointer_name_cstr) + object_pointer_name = object_pointer_name_cstr; + } - DWARFDIE decl_ctx_die; - clang::DeclContext *containing_decl_ctx = - GetClangDeclContextContainingDIE(die, &decl_ctx_die); - const clang::Decl::Kind containing_decl_kind = - containing_decl_ctx->getDeclKind(); + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); - bool is_cxx_method = DeclKindIsCXXClass(containing_decl_kind); - // Start off static. This will be set to false in - // ParseChildParameters(...) if we find a "this" parameters as the - // first parameter - if (is_cxx_method) { - is_static = true; - } + CompilerType return_clang_type; + Type *func_type = NULL; + + if (attrs.type.IsValid()) + func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); + + if (func_type) + return_clang_type = func_type->GetForwardCompilerType(); + else + return_clang_type = m_ast.GetBasicType(eBasicTypeVoid); + + std::vector function_param_types; + std::vector function_param_decls; + + // Parse the function children for the parameters + + DWARFDIE decl_ctx_die; + clang::DeclContext *containing_decl_ctx = + GetClangDeclContextContainingDIE(die, &decl_ctx_die); + const clang::Decl::Kind containing_decl_kind = + containing_decl_ctx->getDeclKind(); + + bool is_cxx_method = DeclKindIsCXXClass(containing_decl_kind); + // Start off static. This will be set to false in + // ParseChildParameters(...) if we find a "this" parameters as the + // first parameter + if (is_cxx_method) { + is_static = true; + } + + if (die.HasChildren()) { + bool skip_artificial = true; + ParseChildParameters(containing_decl_ctx, die, skip_artificial, is_static, + is_variadic, has_template_params, + function_param_types, function_param_decls, + type_quals); + } + + bool ignore_containing_context = false; + // Check for templatized class member functions. If we had any + // DW_TAG_template_type_parameter or DW_TAG_template_value_parameter + // the DW_TAG_subprogram DIE, then we can't let this become a method in + // a class. Why? Because templatized functions are only emitted if one + // of the templatized methods is used in the current compile unit and + // we will end up with classes that may or may not include these member + // functions and this means one class won't match another class + // definition and it affects our ability to use a class in the clang + // expression parser. So for the greater good, we currently must not + // allow any template member functions in a class definition. + if (is_cxx_method && has_template_params) { + ignore_containing_context = true; + is_cxx_method = false; + } + + // clang_type will get the function prototype clang type after this + // call + CompilerType clang_type = m_ast.CreateFunctionType( + return_clang_type, function_param_types.data(), + function_param_types.size(), is_variadic, type_quals); - if (die.HasChildren()) { - bool skip_artificial = true; - ParseChildParameters(containing_decl_ctx, die, skip_artificial, is_static, - is_variadic, has_template_params, - function_param_types, function_param_decls, - type_quals); - } - - bool ignore_containing_context = false; - // Check for templatized class member functions. If we had any - // DW_TAG_template_type_parameter or DW_TAG_template_value_parameter - // the DW_TAG_subprogram DIE, then we can't let this become a method in - // a class. Why? Because templatized functions are only emitted if one - // of the templatized methods is used in the current compile unit and - // we will end up with classes that may or may not include these member - // functions and this means one class won't match another class - // definition and it affects our ability to use a class in the clang - // expression parser. So for the greater good, we currently must not - // allow any template member functions in a class definition. - if (is_cxx_method && has_template_params) { - ignore_containing_context = true; - is_cxx_method = false; - } - - // clang_type will get the function prototype clang type after this - // call - clang_type = m_ast.CreateFunctionType( - return_clang_type, function_param_types.data(), - function_param_types.size(), is_variadic, type_quals); - - if (attrs.name) { - bool type_handled = false; - if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) { - ObjCLanguage::MethodName objc_method(attrs.name.GetStringRef(), true); - if (objc_method.IsValid(true)) { - CompilerType class_opaque_type; - ConstString class_name(objc_method.GetClassName()); - if (class_name) { - TypeSP complete_objc_class_type_sp( - dwarf->FindCompleteObjCDefinitionTypeForDIE(DWARFDIE(), - class_name, false)); - - if (complete_objc_class_type_sp) { - CompilerType type_clang_forward_type = - complete_objc_class_type_sp->GetForwardCompilerType(); - if (ClangASTContext::IsObjCObjectOrInterfaceType( - type_clang_forward_type)) - class_opaque_type = type_clang_forward_type; - } + if (attrs.name) { + bool type_handled = false; + if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) { + ObjCLanguage::MethodName objc_method(attrs.name.GetStringRef(), true); + if (objc_method.IsValid(true)) { + CompilerType class_opaque_type; + ConstString class_name(objc_method.GetClassName()); + if (class_name) { + TypeSP complete_objc_class_type_sp( + dwarf->FindCompleteObjCDefinitionTypeForDIE(DWARFDIE(), + class_name, false)); + + if (complete_objc_class_type_sp) { + CompilerType type_clang_forward_type = + complete_objc_class_type_sp->GetForwardCompilerType(); + if (ClangASTContext::IsObjCObjectOrInterfaceType( + type_clang_forward_type)) + class_opaque_type = type_clang_forward_type; } + } - if (class_opaque_type) { - // If accessibility isn't set to anything valid, assume public - // for now... - if (attrs.accessibility == eAccessNone) - attrs.accessibility = eAccessPublic; - - clang::ObjCMethodDecl *objc_method_decl = - m_ast.AddMethodToObjCObjectType( - class_opaque_type, attrs.name.GetCString(), clang_type, - attrs.accessibility, attrs.is_artificial, is_variadic); - type_handled = objc_method_decl != NULL; - if (type_handled) { - LinkDeclContextToDIE(objc_method_decl, die); - m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); - } else { - dwarf->GetObjectFile()->GetModule()->ReportError( - "{0x%8.8x}: invalid Objective-C method 0x%4.4x (%s), " - "please file a bug and attach the file at the start of " - "this error message", - die.GetOffset(), tag, DW_TAG_value_to_name(tag)); - } + if (class_opaque_type) { + // If accessibility isn't set to anything valid, assume public + // for now... + if (attrs.accessibility == eAccessNone) + attrs.accessibility = eAccessPublic; + + clang::ObjCMethodDecl *objc_method_decl = + m_ast.AddMethodToObjCObjectType( + class_opaque_type, attrs.name.GetCString(), clang_type, + attrs.accessibility, attrs.is_artificial, is_variadic); + type_handled = objc_method_decl != NULL; + if (type_handled) { + LinkDeclContextToDIE(objc_method_decl, die); + m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); + } else { + dwarf->GetObjectFile()->GetModule()->ReportError( + "{0x%8.8x}: invalid Objective-C method 0x%4.4x (%s), " + "please file a bug and attach the file at the start of " + "this error message", + die.GetOffset(), tag, DW_TAG_value_to_name(tag)); } - } else if (is_cxx_method) { - // Look at the parent of this DIE and see if is is a class or - // struct and see if this is actually a C++ method - Type *class_type = dwarf->ResolveType(decl_ctx_die); - if (class_type) { - bool alternate_defn = false; - if (class_type->GetID() != decl_ctx_die.GetID() || - IsClangModuleFwdDecl(decl_ctx_die)) { - alternate_defn = true; - - // We uniqued the parent class of this function to another - // class so we now need to associate all dies under - // "decl_ctx_die" to DIEs in the DIE for "class_type"... - DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); - - if (class_type_die) { - std::vector failures; - - CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, - class_type, failures); - - // FIXME do something with these failures that's - // smarter than just dropping them on the ground. - // Unfortunately classes don't like having stuff added - // to them after their definitions are complete... - - type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; - if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { - type_sp = type_ptr->shared_from_this(); - break; - } + } + } else if (is_cxx_method) { + // Look at the parent of this DIE and see if is is a class or + // struct and see if this is actually a C++ method + Type *class_type = dwarf->ResolveType(decl_ctx_die); + if (class_type) { + bool alternate_defn = false; + if (class_type->GetID() != decl_ctx_die.GetID() || + IsClangModuleFwdDecl(decl_ctx_die)) { + alternate_defn = true; + + // We uniqued the parent class of this function to another + // class so we now need to associate all dies under + // "decl_ctx_die" to DIEs in the DIE for "class_type"... + DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); + + if (class_type_die) { + std::vector failures; + + CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, + class_type, failures); + + // FIXME do something with these failures that's + // smarter than just dropping them on the ground. + // Unfortunately classes don't like having stuff added + // to them after their definitions are complete... + + Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; + if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { + return type_ptr->shared_from_this(); } } + } - if (attrs.specification.IsValid()) { - // We have a specification which we are going to base our - // function prototype off of, so we need this type to be - // completed so that the m_die_to_decl_ctx for the method in - // the specification has a valid clang decl context. - class_type->GetForwardCompilerType(); - // If we have a specification, then the function type should - // have been made with the specification and not with this - // die. - DWARFDIE spec_die = attrs.specification.Reference(); - clang::DeclContext *spec_clang_decl_ctx = - GetClangDeclContextForDIE(spec_die); - if (spec_clang_decl_ctx) { - LinkDeclContextToDIE(spec_clang_decl_ctx, die); - } else { - dwarf->GetObjectFile()->GetModule()->ReportWarning( - "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x" - ") has no decl\n", - die.GetID(), spec_die.GetOffset()); - } - type_handled = true; - } else if (attrs.abstract_origin.IsValid()) { - // We have a specification which we are going to base our - // function prototype off of, so we need this type to be - // completed so that the m_die_to_decl_ctx for the method in - // the abstract origin has a valid clang decl context. - class_type->GetForwardCompilerType(); - - DWARFDIE abs_die = attrs.abstract_origin.Reference(); - clang::DeclContext *abs_clang_decl_ctx = - GetClangDeclContextForDIE(abs_die); - if (abs_clang_decl_ctx) { - LinkDeclContextToDIE(abs_clang_decl_ctx, die); - } else { - dwarf->GetObjectFile()->GetModule()->ReportWarning( - "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x" - ") has no decl\n", - die.GetID(), abs_die.GetOffset()); - } - type_handled = true; + if (attrs.specification.IsValid()) { + // We have a specification which we are going to base our + // function prototype off of, so we need this type to be + // completed so that the m_die_to_decl_ctx for the method in + // the specification has a valid clang decl context. + class_type->GetForwardCompilerType(); + // If we have a specification, then the function type should + // have been made with the specification and not with this + // die. + DWARFDIE spec_die = attrs.specification.Reference(); + clang::DeclContext *spec_clang_decl_ctx = + GetClangDeclContextForDIE(spec_die); + if (spec_clang_decl_ctx) { + LinkDeclContextToDIE(spec_clang_decl_ctx, die); + } else { + dwarf->GetObjectFile()->GetModule()->ReportWarning( + "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x" + ") has no decl\n", + die.GetID(), spec_die.GetOffset()); + } + type_handled = true; + } else if (attrs.abstract_origin.IsValid()) { + // We have a specification which we are going to base our + // function prototype off of, so we need this type to be + // completed so that the m_die_to_decl_ctx for the method in + // the abstract origin has a valid clang decl context. + class_type->GetForwardCompilerType(); + + DWARFDIE abs_die = attrs.abstract_origin.Reference(); + clang::DeclContext *abs_clang_decl_ctx = + GetClangDeclContextForDIE(abs_die); + if (abs_clang_decl_ctx) { + LinkDeclContextToDIE(abs_clang_decl_ctx, die); } else { - CompilerType class_opaque_type = - class_type->GetForwardCompilerType(); - if (ClangASTContext::IsCXXClassType(class_opaque_type)) { - if (class_opaque_type.IsBeingDefined() || alternate_defn) { - if (!is_static && !die.HasChildren()) { - // We have a C++ member function with no children (this - // pointer!) and clang will get mad if we try and make - // a function that isn't well formed in the DWARF, so - // we will just skip it... - type_handled = true; - } else { - bool add_method = true; - if (alternate_defn) { - // If an alternate definition for the class exists, - // then add the method only if an equivalent is not - // already present. - clang::CXXRecordDecl *record_decl = - m_ast.GetAsCXXRecordDecl( - class_opaque_type.GetOpaqueQualType()); - if (record_decl) { - for (auto method_iter = record_decl->method_begin(); - method_iter != record_decl->method_end(); - method_iter++) { - clang::CXXMethodDecl *method_decl = *method_iter; - if (method_decl->getNameInfo().getAsString() == - attrs.name.GetStringRef()) { - if (method_decl->getType() == - ClangUtil::GetQualType(clang_type)) { - add_method = false; - LinkDeclContextToDIE(method_decl, die); - type_handled = true; - - break; - } + dwarf->GetObjectFile()->GetModule()->ReportWarning( + "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x" + ") has no decl\n", + die.GetID(), abs_die.GetOffset()); + } + type_handled = true; + } else { + CompilerType class_opaque_type = + class_type->GetForwardCompilerType(); + if (ClangASTContext::IsCXXClassType(class_opaque_type)) { + if (class_opaque_type.IsBeingDefined() || alternate_defn) { + if (!is_static && !die.HasChildren()) { + // We have a C++ member function with no children (this + // pointer!) and clang will get mad if we try and make + // a function that isn't well formed in the DWARF, so + // we will just skip it... + type_handled = true; + } else { + bool add_method = true; + if (alternate_defn) { + // If an alternate definition for the class exists, + // then add the method only if an equivalent is not + // already present. + clang::CXXRecordDecl *record_decl = + m_ast.GetAsCXXRecordDecl( + class_opaque_type.GetOpaqueQualType()); + if (record_decl) { + for (auto method_iter = record_decl->method_begin(); + method_iter != record_decl->method_end(); + method_iter++) { + clang::CXXMethodDecl *method_decl = *method_iter; + if (method_decl->getNameInfo().getAsString() == + attrs.name.GetStringRef()) { + if (method_decl->getType() == + ClangUtil::GetQualType(clang_type)) { + add_method = false; + LinkDeclContextToDIE(method_decl, die); + type_handled = true; + + break; } } } } + } - if (add_method) { - llvm::PrettyStackTraceFormat stack_trace( - "SymbolFileDWARF::ParseType() is adding a method " - "%s to class %s in DIE 0x%8.8" PRIx64 " from %s", - attrs.name.GetCString(), - class_type->GetName().GetCString(), die.GetID(), - dwarf->GetObjectFile() - ->GetFileSpec() - .GetPath() - .c_str()); - - const bool is_attr_used = false; - // Neither GCC 4.2 nor clang++ currently set a valid - // accessibility in the DWARF for C++ methods... - // Default to public for now... - if (attrs.accessibility == eAccessNone) - attrs.accessibility = eAccessPublic; - - clang::CXXMethodDecl *cxx_method_decl = - m_ast.AddMethodToCXXRecordType( - class_opaque_type.GetOpaqueQualType(), - attrs.name.GetCString(), attrs.mangled_name, - clang_type, attrs.accessibility, attrs.is_virtual, - is_static, attrs.is_inline, attrs.is_explicit, - is_attr_used, attrs.is_artificial); - - type_handled = cxx_method_decl != NULL; - // Artificial methods are always handled even when we - // don't create a new declaration for them. - type_handled |= attrs.is_artificial; - - if (cxx_method_decl) { - LinkDeclContextToDIE(cxx_method_decl, die); - - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); - - if (!object_pointer_name.empty()) { - metadata.SetObjectPtrName( - object_pointer_name.c_str()); - LLDB_LOGF(log, - "Setting object pointer name: %s on method " - "object %p.\n", - object_pointer_name.c_str(), - static_cast(cxx_method_decl)); - } - m_ast.SetMetadata(cxx_method_decl, metadata); - } else { - ignore_containing_context = true; + if (add_method) { + llvm::PrettyStackTraceFormat stack_trace( + "SymbolFileDWARF::ParseType() is adding a method " + "%s to class %s in DIE 0x%8.8" PRIx64 " from %s", + attrs.name.GetCString(), + class_type->GetName().GetCString(), die.GetID(), + dwarf->GetObjectFile() + ->GetFileSpec() + .GetPath() + .c_str()); + + const bool is_attr_used = false; + // Neither GCC 4.2 nor clang++ currently set a valid + // accessibility in the DWARF for C++ methods... + // Default to public for now... + if (attrs.accessibility == eAccessNone) + attrs.accessibility = eAccessPublic; + + clang::CXXMethodDecl *cxx_method_decl = + m_ast.AddMethodToCXXRecordType( + class_opaque_type.GetOpaqueQualType(), + attrs.name.GetCString(), attrs.mangled_name, + clang_type, attrs.accessibility, attrs.is_virtual, + is_static, attrs.is_inline, attrs.is_explicit, + is_attr_used, attrs.is_artificial); + + type_handled = cxx_method_decl != NULL; + // Artificial methods are always handled even when we + // don't create a new declaration for them. + type_handled |= attrs.is_artificial; + + if (cxx_method_decl) { + LinkDeclContextToDIE(cxx_method_decl, die); + + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); + + if (!object_pointer_name.empty()) { + metadata.SetObjectPtrName( + object_pointer_name.c_str()); + LLDB_LOGF(log, + "Setting object pointer name: %s on method " + "object %p.\n", + object_pointer_name.c_str(), + static_cast(cxx_method_decl)); } + m_ast.SetMetadata(cxx_method_decl, metadata); + } else { + ignore_containing_context = true; } } - } else { - // We were asked to parse the type for a method in a - // class, yet the class hasn't been asked to complete - // itself through the clang::ExternalASTSource protocol, - // so we need to just have the class complete itself and - // do things the right way, then our - // DIE should then have an entry in the - // dwarf->GetDIEToType() map. First - // we need to modify the dwarf->GetDIEToType() so it - // doesn't think we are trying to parse this DIE - // anymore... - dwarf->GetDIEToType()[die.GetDIE()] = NULL; - - // Now we get the full type to force our class type to - // complete itself using the clang::ExternalASTSource - // protocol which will parse all base classes and all - // methods (including the method for this DIE). - class_type->GetFullCompilerType(); - - // The type for this DIE should have been filled in the - // function call above - type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; - if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { - type_sp = type_ptr->shared_from_this(); - break; - } - - // FIXME This is fixing some even uglier behavior but we - // really need to - // uniq the methods of each class as well as the class - // itself. - type_handled = true; } + } else { + // We were asked to parse the type for a method in a + // class, yet the class hasn't been asked to complete + // itself through the clang::ExternalASTSource protocol, + // so we need to just have the class complete itself and + // do things the right way, then our + // DIE should then have an entry in the + // dwarf->GetDIEToType() map. First + // we need to modify the dwarf->GetDIEToType() so it + // doesn't think we are trying to parse this DIE + // anymore... + dwarf->GetDIEToType()[die.GetDIE()] = NULL; + + // Now we get the full type to force our class type to + // complete itself using the clang::ExternalASTSource + // protocol which will parse all base classes and all + // methods (including the method for this DIE). + class_type->GetFullCompilerType(); + + // The type for this DIE should have been filled in the + // function call above + Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; + if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { + return type_ptr->shared_from_this(); + } + + // FIXME This is fixing some even uglier behavior but we + // really need to + // uniq the methods of each class as well as the class + // itself. + type_handled = true; } } } } } + } - if (!type_handled) { - clang::FunctionDecl *function_decl = nullptr; - clang::FunctionDecl *template_function_decl = nullptr; + if (!type_handled) { + clang::FunctionDecl *function_decl = nullptr; + clang::FunctionDecl *template_function_decl = nullptr; - if (attrs.abstract_origin.IsValid()) { - DWARFDIE abs_die = attrs.abstract_origin.Reference(); + if (attrs.abstract_origin.IsValid()) { + DWARFDIE abs_die = attrs.abstract_origin.Reference(); - if (dwarf->ResolveType(abs_die)) { - function_decl = llvm::dyn_cast_or_null( - GetCachedClangDeclContextForDIE(abs_die)); + if (dwarf->ResolveType(abs_die)) { + function_decl = llvm::dyn_cast_or_null( + GetCachedClangDeclContextForDIE(abs_die)); - if (function_decl) { - LinkDeclContextToDIE(function_decl, die); - } + if (function_decl) { + LinkDeclContextToDIE(function_decl, die); } } + } - if (!function_decl) { - // We just have a function that isn't part of a class - function_decl = m_ast.CreateFunctionDeclaration( + if (!function_decl) { + // We just have a function that isn't part of a class + function_decl = m_ast.CreateFunctionDeclaration( + ignore_containing_context ? m_ast.GetTranslationUnitDecl() + : containing_decl_ctx, + attrs.name.GetCString(), clang_type, attrs.storage, + attrs.is_inline); + + if (has_template_params) { + ClangASTContext::TemplateParameterInfos template_param_infos; + ParseTemplateParameterInfos(die, template_param_infos); + template_function_decl = m_ast.CreateFunctionDeclaration( ignore_containing_context ? m_ast.GetTranslationUnitDecl() : containing_decl_ctx, attrs.name.GetCString(), clang_type, attrs.storage, attrs.is_inline); + clang::FunctionTemplateDecl *func_template_decl = + m_ast.CreateFunctionTemplateDecl( + containing_decl_ctx, template_function_decl, + attrs.name.GetCString(), template_param_infos); + m_ast.CreateFunctionTemplateSpecializationInfo( + function_decl, func_template_decl, template_param_infos); + } - if (has_template_params) { - ClangASTContext::TemplateParameterInfos template_param_infos; - ParseTemplateParameterInfos(die, template_param_infos); - template_function_decl = m_ast.CreateFunctionDeclaration( - ignore_containing_context ? m_ast.GetTranslationUnitDecl() - : containing_decl_ctx, - attrs.name.GetCString(), clang_type, attrs.storage, - attrs.is_inline); - clang::FunctionTemplateDecl *func_template_decl = - m_ast.CreateFunctionTemplateDecl( - containing_decl_ctx, template_function_decl, - attrs.name.GetCString(), template_param_infos); - m_ast.CreateFunctionTemplateSpecializationInfo( - function_decl, func_template_decl, template_param_infos); - } + lldbassert(function_decl); - lldbassert(function_decl); + if (function_decl) { + LinkDeclContextToDIE(function_decl, die); - if (function_decl) { - LinkDeclContextToDIE(function_decl, die); - - if (!function_param_decls.empty()) { - m_ast.SetFunctionParameters(function_decl, + if (!function_param_decls.empty()) { + m_ast.SetFunctionParameters(function_decl, + &function_param_decls.front(), + function_param_decls.size()); + if (template_function_decl) + m_ast.SetFunctionParameters(template_function_decl, &function_param_decls.front(), function_param_decls.size()); - if (template_function_decl) - m_ast.SetFunctionParameters(template_function_decl, - &function_param_decls.front(), - function_param_decls.size()); - } + } - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); - if (!object_pointer_name.empty()) { - metadata.SetObjectPtrName(object_pointer_name.c_str()); - LLDB_LOGF(log, - "Setting object pointer name: %s on function " - "object %p.", - object_pointer_name.c_str(), - static_cast(function_decl)); - } - m_ast.SetMetadata(function_decl, metadata); + if (!object_pointer_name.empty()) { + metadata.SetObjectPtrName(object_pointer_name.c_str()); + LLDB_LOGF(log, + "Setting object pointer name: %s on function " + "object %p.", + object_pointer_name.c_str(), + static_cast(function_decl)); } + m_ast.SetMetadata(function_decl, metadata); } } } - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, llvm::None, nullptr, LLDB_INVALID_UID, - Type::eEncodingIsUID, &attrs.decl, clang_type, Type::ResolveState::Full); - assert(type_sp.get()); - } break; + } + return std::make_shared( + die.GetID(), dwarf, attrs.name, llvm::None, nullptr, LLDB_INVALID_UID, + Type::eEncodingIsUID, &attrs.decl, clang_type, Type::ResolveState::Full); +} - case DW_TAG_array_type: { - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); +TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + SymbolFileDWARF *dwarf = die.GetDWARF(); - DWARFDIE type_die = attrs.type.Reference(); - Type *element_type = dwarf->ResolveTypeUID(type_die, true); + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); - if (element_type) { - auto array_info = ParseChildArrayInfo(die); - if (array_info) { - attrs.byte_stride = array_info->byte_stride; - attrs.bit_stride = array_info->bit_stride; - } - if (attrs.byte_stride == 0 && attrs.bit_stride == 0) - attrs.byte_stride = element_type->GetByteSize().getValueOr(0); - CompilerType array_element_type = element_type->GetForwardCompilerType(); - - if (ClangASTContext::IsCXXClassType(array_element_type) && - !array_element_type.GetCompleteType()) { - ModuleSP module_sp = die.GetModule(); - if (module_sp) { - if (die.GetCU()->GetProducer() == eProducerClang) - module_sp->ReportError( - "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " - "class/union/struct element type DIE 0x%8.8x that is a " - "forward declaration, not a complete definition.\nTry " - "compiling the source file with -fstandalone-debug or " - "disable -gmodules", - die.GetOffset(), type_die.GetOffset()); - else - module_sp->ReportError( - "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " - "class/union/struct element type DIE 0x%8.8x that is a " - "forward declaration, not a complete definition.\nPlease " - "file a bug against the compiler and include the " - "preprocessed output for %s", - die.GetOffset(), type_die.GetOffset(), - GetUnitName(die).c_str()); - } + DWARFDIE type_die = attrs.type.Reference(); + Type *element_type = dwarf->ResolveTypeUID(type_die, true); - // We have no choice other than to pretend that the element class - // type is complete. If we don't do this, clang will crash when - // trying to layout the class. Since we provide layout - // assistance, all ivars in this class and other classes will be - // fine, this is the best we can do short of crashing. - if (ClangASTContext::StartTagDeclarationDefinition( - array_element_type)) { - ClangASTContext::CompleteTagDeclarationDefinition(array_element_type); - } else { - module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to " - "start its definition.\nPlease file a " - "bug and attach the file at the start " - "of this error message", - type_die.GetOffset()); - } - } + if (!element_type) + return nullptr; - uint64_t array_element_bit_stride = - attrs.byte_stride * 8 + attrs.bit_stride; - if (array_info && array_info->element_orders.size() > 0) { - uint64_t num_elements = 0; - auto end = array_info->element_orders.rend(); - for (auto pos = array_info->element_orders.rbegin(); pos != end; - ++pos) { - num_elements = *pos; - clang_type = m_ast.CreateArrayType(array_element_type, num_elements, - attrs.is_vector); - array_element_type = clang_type; - array_element_bit_stride = - num_elements ? array_element_bit_stride * num_elements - : array_element_bit_stride; - } - } else { - clang_type = m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); - } - ConstString empty_name; - type_sp = std::make_shared( - die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr, - dwarf->GetUID(type_die), Type::eEncodingIsUID, &attrs.decl, - clang_type, Type::ResolveState::Full); - type_sp->SetEncodingType(element_type); - m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID()); + llvm::Optional array_info = ParseChildArrayInfo(die); + if (array_info) { + attrs.byte_stride = array_info->byte_stride; + attrs.bit_stride = array_info->bit_stride; + } + if (attrs.byte_stride == 0 && attrs.bit_stride == 0) + attrs.byte_stride = element_type->GetByteSize().getValueOr(0); + CompilerType array_element_type = element_type->GetForwardCompilerType(); + + if (ClangASTContext::IsCXXClassType(array_element_type) && + !array_element_type.GetCompleteType()) { + ModuleSP module_sp = die.GetModule(); + if (module_sp) { + if (die.GetCU()->GetProducer() == eProducerClang) + module_sp->ReportError( + "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " + "class/union/struct element type DIE 0x%8.8x that is a " + "forward declaration, not a complete definition.\nTry " + "compiling the source file with -fstandalone-debug or " + "disable -gmodules", + die.GetOffset(), type_die.GetOffset()); + else + module_sp->ReportError( + "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " + "class/union/struct element type DIE 0x%8.8x that is a " + "forward declaration, not a complete definition.\nPlease " + "file a bug against the compiler and include the " + "preprocessed output for %s", + die.GetOffset(), type_die.GetOffset(), GetUnitName(die).c_str()); + } + + // We have no choice other than to pretend that the element class + // type is complete. If we don't do this, clang will crash when + // trying to layout the class. Since we provide layout + // assistance, all ivars in this class and other classes will be + // fine, this is the best we can do short of crashing. + if (ClangASTContext::StartTagDeclarationDefinition(array_element_type)) { + ClangASTContext::CompleteTagDeclarationDefinition(array_element_type); + } else { + module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to " + "start its definition.\nPlease file a " + "bug and attach the file at the start " + "of this error message", + type_die.GetOffset()); } - } break; + } - case DW_TAG_ptr_to_member_type: { - Type *pointee_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); - Type *class_type = - dwarf->ResolveTypeUID(attrs.containing_type.Reference(), true); + uint64_t array_element_bit_stride = + attrs.byte_stride * 8 + attrs.bit_stride; + CompilerType clang_type; + if (array_info && array_info->element_orders.size() > 0) { + uint64_t num_elements = 0; + auto end = array_info->element_orders.rend(); + for (auto pos = array_info->element_orders.rbegin(); pos != end; ++pos) { + num_elements = *pos; + clang_type = m_ast.CreateArrayType(array_element_type, num_elements, + attrs.is_vector); + array_element_type = clang_type; + array_element_bit_stride = num_elements + ? array_element_bit_stride * num_elements + : array_element_bit_stride; + } + } else { + clang_type = + m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); + } + ConstString empty_name; + TypeSP type_sp = std::make_shared( + die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr, + dwarf->GetUID(type_die), Type::eEncodingIsUID, &attrs.decl, clang_type, + Type::ResolveState::Full); + type_sp->SetEncodingType(element_type); + m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID()); + return type_sp; +} - CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType(); - CompilerType class_clang_type = class_type->GetLayoutCompilerType(); +TypeSP DWARFASTParserClang::ParsePointerToMemberType( + const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + Type *pointee_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); + Type *class_type = + dwarf->ResolveTypeUID(attrs.containing_type.Reference(), true); - clang_type = ClangASTContext::CreateMemberPointerType(class_clang_type, - pointee_clang_type); + CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType(); + CompilerType class_clang_type = class_type->GetLayoutCompilerType(); - if (llvm::Optional clang_type_size = - clang_type.GetByteSize(nullptr)) { - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, *clang_type_size, nullptr, - LLDB_INVALID_UID, Type::eEncodingIsUID, nullptr, clang_type, - Type::ResolveState::Forward); - } + CompilerType clang_type = ClangASTContext::CreateMemberPointerType( + class_clang_type, pointee_clang_type); - break; + if (llvm::Optional clang_type_size = + clang_type.GetByteSize(nullptr)) { + return std::make_shared(die.GetID(), dwarf, attrs.name, + *clang_type_size, nullptr, LLDB_INVALID_UID, + Type::eEncodingIsUID, nullptr, clang_type, + Type::ResolveState::Forward); } - default: - dwarf->GetObjectFile()->GetModule()->ReportError( - "{0x%8.8x}: unhandled type tag 0x%4.4x (%s), please file a bug and " - "attach the file at the start of this error message", - die.GetOffset(), tag, DW_TAG_value_to_name(tag)); - break; - } - - // TODO: We should consider making the switch above exhaustive to simplify - // control flow in ParseTypeFromDWARF. Then, we could simply replace this - // return statement with a call to llvm_unreachable. - return UpdateSymbolContextScopeForType(sc, die, type_sp); + return nullptr; } TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( @@ -1330,20 +1369,20 @@ TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); dw_tag_t sc_parent_tag = sc_parent_die.Tag(); - SymbolContextScope *symbol_context_scope = NULL; + SymbolContextScope *symbol_context_scope = nullptr; if (sc_parent_tag == DW_TAG_compile_unit || sc_parent_tag == DW_TAG_partial_unit) { symbol_context_scope = sc.comp_unit; - } else if (sc.function != NULL && sc_parent_die) { + } else if (sc.function != nullptr && sc_parent_die) { symbol_context_scope = sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); - if (symbol_context_scope == NULL) + if (symbol_context_scope == nullptr) symbol_context_scope = sc.function; } else { symbol_context_scope = sc.module_sp.get(); } - if (symbol_context_scope != NULL) + if (symbol_context_scope != nullptr) type_sp->SetSymbolContextScope(symbol_context_scope); // We are ready to put this type into the uniqued list up at the module @@ -1930,300 +1969,214 @@ bool DWARFASTParserClang::ParseTemplateParameterInfos( return template_param_infos.args.size() == template_param_infos.names.size(); } -bool DWARFASTParserClang::CompleteTypeFromDWARF(const DWARFDIE &die, - lldb_private::Type *type, - CompilerType &clang_type) { +bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, + lldb_private::Type *type, + CompilerType &clang_type) { + const dw_tag_t tag = die.Tag(); SymbolFileDWARF *dwarf = die.GetDWARF(); - std::lock_guard guard( - dwarf->GetObjectFile()->GetModule()->GetMutex()); + ClangASTImporter::LayoutInfo layout_info; - // Disable external storage for this type so we don't get anymore - // clang::ExternalASTSource queries for this type. - m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), false); - - if (!die) - return false; - -#if defined LLDB_CONFIGURATION_DEBUG - // For debugging purposes, the LLDB_DWARF_DONT_COMPLETE_TYPENAMES environment - // variable can be set with one or more typenames separated by ';' - // characters. This will cause this function to not complete any types whose - // names match. - // - // Examples of setting this environment variable: - // - // LLDB_DWARF_DONT_COMPLETE_TYPENAMES=Foo - // LLDB_DWARF_DONT_COMPLETE_TYPENAMES=Foo;Bar;Baz - const char *dont_complete_typenames_cstr = - getenv("LLDB_DWARF_DONT_COMPLETE_TYPENAMES"); - if (dont_complete_typenames_cstr && dont_complete_typenames_cstr[0]) { - const char *die_name = die.GetName(); - if (die_name && die_name[0]) { - const char *match = strstr(dont_complete_typenames_cstr, die_name); - if (match) { - size_t die_name_length = strlen(die_name); - while (match) { - const char separator_char = ';'; - const char next_char = match[die_name_length]; - if (next_char == '\0' || next_char == separator_char) { - if (match == dont_complete_typenames_cstr || - match[-1] == separator_char) - return false; - } - match = strstr(match + 1, die_name); - } + { + if (die.HasChildren()) { + LanguageType class_language = eLanguageTypeUnknown; + if (ClangASTContext::IsObjCObjectOrInterfaceType(clang_type)) { + class_language = eLanguageTypeObjC; + // For objective C we don't start the definition when the class is + // created. + ClangASTContext::StartTagDeclarationDefinition(clang_type); } - } - } -#endif - const dw_tag_t tag = die.Tag(); - - Log *log = - nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO|DWARF_LOG_TYPE_COMPLETION)); - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( - log, "0x%8.8" PRIx64 ": %s '%s' resolving forward declaration...", - die.GetID(), die.GetTagAsCString(), type->GetName().AsCString()); - assert(clang_type); - DWARFAttributes attributes; - switch (tag) { - case DW_TAG_structure_type: - case DW_TAG_union_type: - case DW_TAG_class_type: { - ClangASTImporter::LayoutInfo layout_info; - - { - if (die.HasChildren()) { - LanguageType class_language = eLanguageTypeUnknown; - if (ClangASTContext::IsObjCObjectOrInterfaceType(clang_type)) { - class_language = eLanguageTypeObjC; - // For objective C we don't start the definition when the class is - // created. - ClangASTContext::StartTagDeclarationDefinition(clang_type); - } - - int tag_decl_kind = -1; - AccessType default_accessibility = eAccessNone; - if (tag == DW_TAG_structure_type) { - tag_decl_kind = clang::TTK_Struct; - default_accessibility = eAccessPublic; - } else if (tag == DW_TAG_union_type) { - tag_decl_kind = clang::TTK_Union; - default_accessibility = eAccessPublic; - } else if (tag == DW_TAG_class_type) { - tag_decl_kind = clang::TTK_Class; - default_accessibility = eAccessPrivate; - } - - std::vector> bases; - std::vector member_accessibilities; - bool is_a_class = false; - // Parse members and base classes first - std::vector member_function_dies; - - DelayedPropertyList delayed_properties; - ParseChildMembers(die, clang_type, class_language, bases, - member_accessibilities, member_function_dies, - delayed_properties, default_accessibility, is_a_class, - layout_info); - - // Now parse any methods if there were any... - for (const DWARFDIE &die : member_function_dies) - dwarf->ResolveType(die); - - if (class_language == eLanguageTypeObjC) { - ConstString class_name(clang_type.GetTypeName()); - if (class_name) { - DIEArray method_die_offsets; - dwarf->GetObjCMethodDIEOffsets(class_name, method_die_offsets); - - if (!method_die_offsets.empty()) { - DWARFDebugInfo *debug_info = dwarf->DebugInfo(); - - const size_t num_matches = method_die_offsets.size(); - for (size_t i = 0; i < num_matches; ++i) { - const DIERef &die_ref = method_die_offsets[i]; - DWARFDIE method_die = debug_info->GetDIE(die_ref); - - if (method_die) - method_die.ResolveType(); - } - } - - for (DelayedPropertyList::iterator pi = delayed_properties.begin(), - pe = delayed_properties.end(); - pi != pe; ++pi) - pi->Finalize(); - } - } - - // If we have a DW_TAG_structure_type instead of a DW_TAG_class_type we - // need to tell the clang type it is actually a class. - if (class_language != eLanguageTypeObjC) { - if (is_a_class && tag_decl_kind != clang::TTK_Class) - m_ast.SetTagTypeKind(ClangUtil::GetQualType(clang_type), - clang::TTK_Class); - } - - // Since DW_TAG_structure_type gets used for both classes and - // structures, we may need to set any DW_TAG_member fields to have a - // "private" access if none was specified. When we parsed the child - // members we tracked that actual accessibility value for each - // DW_TAG_member in the "member_accessibilities" array. If the value - // for the member is zero, then it was set to the - // "default_accessibility" which for structs was "public". Below we - // correct this by setting any fields to "private" that weren't - // correctly set. - if (is_a_class && !member_accessibilities.empty()) { - // This is a class and all members that didn't have their access - // specified are private. - m_ast.SetDefaultAccessForRecordFields( - m_ast.GetAsRecordDecl(clang_type), eAccessPrivate, - &member_accessibilities.front(), member_accessibilities.size()); - } + int tag_decl_kind = -1; + AccessType default_accessibility = eAccessNone; + if (tag == DW_TAG_structure_type) { + tag_decl_kind = clang::TTK_Struct; + default_accessibility = eAccessPublic; + } else if (tag == DW_TAG_union_type) { + tag_decl_kind = clang::TTK_Union; + default_accessibility = eAccessPublic; + } else if (tag == DW_TAG_class_type) { + tag_decl_kind = clang::TTK_Class; + default_accessibility = eAccessPrivate; + } - if (!bases.empty()) { - // Make sure all base classes refer to complete types and not forward - // declarations. If we don't do this, clang will crash with an - // assertion in the call to clang_type.TransferBaseClasses() - for (const auto &base_class : bases) { - clang::TypeSourceInfo *type_source_info = - base_class->getTypeSourceInfo(); - if (type_source_info) { - CompilerType base_class_type( - &m_ast, type_source_info->getType().getAsOpaquePtr()); - if (!base_class_type.GetCompleteType()) { - auto module = dwarf->GetObjectFile()->GetModule(); - module->ReportError(":: Class '%s' has a base class '%s' which " - "does not have a complete definition.", - die.GetName(), - base_class_type.GetTypeName().GetCString()); - if (die.GetCU()->GetProducer() == eProducerClang) - module->ReportError(":: Try compiling the source file with " - "-fstandalone-debug."); - - // We have no choice other than to pretend that the base class - // is complete. If we don't do this, clang will crash when we - // call setBases() inside of - // "clang_type.TransferBaseClasses()" below. Since we - // provide layout assistance, all ivars in this class and other - // classes will be fine, this is the best we can do short of - // crashing. - if (ClangASTContext::StartTagDeclarationDefinition( - base_class_type)) { - ClangASTContext::CompleteTagDeclarationDefinition( - base_class_type); - } - } + std::vector> bases; + std::vector member_accessibilities; + bool is_a_class = false; + // Parse members and base classes first + std::vector member_function_dies; + + DelayedPropertyList delayed_properties; + ParseChildMembers(die, clang_type, class_language, bases, + member_accessibilities, member_function_dies, + delayed_properties, default_accessibility, is_a_class, + layout_info); + + // Now parse any methods if there were any... + for (const DWARFDIE &die : member_function_dies) + dwarf->ResolveType(die); + + if (class_language == eLanguageTypeObjC) { + ConstString class_name(clang_type.GetTypeName()); + if (class_name) { + DIEArray method_die_offsets; + dwarf->GetObjCMethodDIEOffsets(class_name, method_die_offsets); + + if (!method_die_offsets.empty()) { + DWARFDebugInfo *debug_info = dwarf->DebugInfo(); + + const size_t num_matches = method_die_offsets.size(); + for (size_t i = 0; i < num_matches; ++i) { + const DIERef &die_ref = method_die_offsets[i]; + DWARFDIE method_die = debug_info->GetDIE(die_ref); + + if (method_die) + method_die.ResolveType(); } } - m_ast.TransferBaseClasses(clang_type.GetOpaqueQualType(), - std::move(bases)); + for (DelayedPropertyList::iterator pi = delayed_properties.begin(), + pe = delayed_properties.end(); + pi != pe; ++pi) + pi->Finalize(); } } - } - m_ast.AddMethodOverridesForCXXRecordType(clang_type.GetOpaqueQualType()); - ClangASTContext::BuildIndirectFields(clang_type); - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); - - if (!layout_info.field_offsets.empty() || - !layout_info.base_offsets.empty() || - !layout_info.vbase_offsets.empty()) { - if (type) - layout_info.bit_size = type->GetByteSize().getValueOr(0) * 8; - if (layout_info.bit_size == 0) - layout_info.bit_size = - die.GetAttributeValueAsUnsigned(DW_AT_byte_size, 0) * 8; - - clang::CXXRecordDecl *record_decl = - m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); - if (record_decl) { - if (log) { - ModuleSP module_sp = dwarf->GetObjectFile()->GetModule(); + // If we have a DW_TAG_structure_type instead of a DW_TAG_class_type we + // need to tell the clang type it is actually a class. + if (class_language != eLanguageTypeObjC) { + if (is_a_class && tag_decl_kind != clang::TTK_Class) + m_ast.SetTagTypeKind(ClangUtil::GetQualType(clang_type), + clang::TTK_Class); + } - if (module_sp) { - module_sp->LogMessage( - log, - "ClangASTContext::CompleteTypeFromDWARF (clang_type = %p) " - "caching layout info for record_decl = %p, bit_size = %" PRIu64 - ", alignment = %" PRIu64 - ", field_offsets[%u], base_offsets[%u], vbase_offsets[%u])", - static_cast(clang_type.GetOpaqueQualType()), - static_cast(record_decl), layout_info.bit_size, - layout_info.alignment, - static_cast(layout_info.field_offsets.size()), - static_cast(layout_info.base_offsets.size()), - static_cast(layout_info.vbase_offsets.size())); - - uint32_t idx; - { - llvm::DenseMap::const_iterator - pos, - end = layout_info.field_offsets.end(); - for (idx = 0, pos = layout_info.field_offsets.begin(); pos != end; - ++pos, ++idx) { - module_sp->LogMessage( - log, "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) field[%u] = { bit_offset=%u, name='%s' }", - static_cast(clang_type.GetOpaqueQualType()), idx, - static_cast(pos->second), - pos->first->getNameAsString().c_str()); - } - } + // Since DW_TAG_structure_type gets used for both classes and + // structures, we may need to set any DW_TAG_member fields to have a + // "private" access if none was specified. When we parsed the child + // members we tracked that actual accessibility value for each + // DW_TAG_member in the "member_accessibilities" array. If the value + // for the member is zero, then it was set to the + // "default_accessibility" which for structs was "public". Below we + // correct this by setting any fields to "private" that weren't + // correctly set. + if (is_a_class && !member_accessibilities.empty()) { + // This is a class and all members that didn't have their access + // specified are private. + m_ast.SetDefaultAccessForRecordFields( + m_ast.GetAsRecordDecl(clang_type), eAccessPrivate, + &member_accessibilities.front(), member_accessibilities.size()); + } - { - llvm::DenseMap::const_iterator base_pos, - base_end = layout_info.base_offsets.end(); - for (idx = 0, base_pos = layout_info.base_offsets.begin(); - base_pos != base_end; ++base_pos, ++idx) { - module_sp->LogMessage( - log, "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) base[%u] = { byte_offset=%u, name='%s' }", - clang_type.GetOpaqueQualType(), idx, - (uint32_t)base_pos->second.getQuantity(), - base_pos->first->getNameAsString().c_str()); - } - } - { - llvm::DenseMap::const_iterator vbase_pos, - vbase_end = layout_info.vbase_offsets.end(); - for (idx = 0, vbase_pos = layout_info.vbase_offsets.begin(); - vbase_pos != vbase_end; ++vbase_pos, ++idx) { - module_sp->LogMessage( - log, "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) vbase[%u] = { byte_offset=%u, name='%s' }", - static_cast(clang_type.GetOpaqueQualType()), idx, - static_cast(vbase_pos->second.getQuantity()), - vbase_pos->first->getNameAsString().c_str()); + if (!bases.empty()) { + // Make sure all base classes refer to complete types and not forward + // declarations. If we don't do this, clang will crash with an + // assertion in the call to clang_type.TransferBaseClasses() + for (const auto &base_class : bases) { + clang::TypeSourceInfo *type_source_info = + base_class->getTypeSourceInfo(); + if (type_source_info) { + CompilerType base_class_type( + &m_ast, type_source_info->getType().getAsOpaquePtr()); + if (!base_class_type.GetCompleteType()) { + auto module = dwarf->GetObjectFile()->GetModule(); + module->ReportError(":: Class '%s' has a base class '%s' which " + "does not have a complete definition.", + die.GetName(), + base_class_type.GetTypeName().GetCString()); + if (die.GetCU()->GetProducer() == eProducerClang) + module->ReportError(":: Try compiling the source file with " + "-fstandalone-debug."); + + // We have no choice other than to pretend that the base class + // is complete. If we don't do this, clang will crash when we + // call setBases() inside of + // "clang_type.TransferBaseClasses()" below. Since we + // provide layout assistance, all ivars in this class and other + // classes will be fine, this is the best we can do short of + // crashing. + if (ClangASTContext::StartTagDeclarationDefinition( + base_class_type)) { + ClangASTContext::CompleteTagDeclarationDefinition( + base_class_type); } } } } - GetClangASTImporter().InsertRecordDecl(record_decl, layout_info); + + m_ast.TransferBaseClasses(clang_type.GetOpaqueQualType(), + std::move(bases)); } } } - return (bool)clang_type; + m_ast.AddMethodOverridesForCXXRecordType(clang_type.GetOpaqueQualType()); + ClangASTContext::BuildIndirectFields(clang_type); + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); - case DW_TAG_enumeration_type: - if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { - if (die.HasChildren()) { - bool is_signed = false; - clang_type.IsIntegerType(is_signed); - ParseChildEnumerators(clang_type, is_signed, - type->GetByteSize().getValueOr(0), die); - } - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + if (!layout_info.field_offsets.empty() || !layout_info.base_offsets.empty() || + !layout_info.vbase_offsets.empty()) { + if (type) + layout_info.bit_size = type->GetByteSize().getValueOr(0) * 8; + if (layout_info.bit_size == 0) + layout_info.bit_size = + die.GetAttributeValueAsUnsigned(DW_AT_byte_size, 0) * 8; + + clang::CXXRecordDecl *record_decl = + m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); + if (record_decl) + GetClangASTImporter().InsertRecordDecl(record_decl, layout_info); + } + + return (bool)clang_type; +} + +bool DWARFASTParserClang::CompleteEnumType(const DWARFDIE &die, + lldb_private::Type *type, + CompilerType &clang_type) { + if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { + if (die.HasChildren()) { + bool is_signed = false; + clang_type.IsIntegerType(is_signed); + ParseChildEnumerators(clang_type, is_signed, + type->GetByteSize().getValueOr(0), die); } - return (bool)clang_type; + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + } + return (bool)clang_type; +} + +bool DWARFASTParserClang::CompleteTypeFromDWARF(const DWARFDIE &die, + lldb_private::Type *type, + CompilerType &clang_type) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + + std::lock_guard guard( + dwarf->GetObjectFile()->GetModule()->GetMutex()); + + // Disable external storage for this type so we don't get anymore + // clang::ExternalASTSource queries for this type. + m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), false); + + if (!die) + return false; + + const dw_tag_t tag = die.Tag(); + Log *log = + nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO|DWARF_LOG_TYPE_COMPLETION)); + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( + log, "0x%8.8" PRIx64 ": %s '%s' resolving forward declaration...", + die.GetID(), die.GetTagAsCString(), type->GetName().AsCString()); + assert(clang_type); + DWARFAttributes attributes; + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_class_type: + return CompleteRecordType(die, type, clang_type); + case DW_TAG_enumeration_type: + return CompleteEnumType(die, type, clang_type); default: assert(false && "not a forward clang type decl!"); break; @@ -2495,495 +2448,500 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, return nullptr; } -bool DWARFASTParserClang::ParseChildMembers( - const DWARFDIE &parent_die, CompilerType &class_clang_type, - const LanguageType class_language, - std::vector> &base_classes, +void DWARFASTParserClang::ParseSingleMember( + const DWARFDIE &die, const DWARFDIE &parent_die, + lldb_private::CompilerType &class_clang_type, + const lldb::LanguageType class_language, std::vector &member_accessibilities, - std::vector &member_function_dies, - DelayedPropertyList &delayed_properties, AccessType &default_accessibility, - bool &is_a_class, ClangASTImporter::LayoutInfo &layout_info) { - if (!parent_die) - return false; - + lldb::AccessType &default_accessibility, + DelayedPropertyList &delayed_properties, + lldb_private::ClangASTImporter::LayoutInfo &layout_info, + BitfieldInfo &last_field_info) { + ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule(); + const dw_tag_t tag = die.Tag(); // Get the parent byte size so we can verify any members will fit const uint64_t parent_byte_size = parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, UINT64_MAX); const uint64_t parent_bit_size = parent_byte_size == UINT64_MAX ? UINT64_MAX : parent_byte_size * 8; - uint32_t member_idx = 0; - BitfieldInfo last_field_info; - - ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule(); - ClangASTContext *ast = - llvm::dyn_cast_or_null(class_clang_type.GetTypeSystem()); - if (ast == nullptr) - return false; - - for (DWARFDIE die = parent_die.GetFirstChild(); die.IsValid(); - die = die.GetSibling()) { - dw_tag_t tag = die.Tag(); - - switch (tag) { - case DW_TAG_member: - case DW_TAG_APPLE_property: { - DWARFAttributes attributes; - const size_t num_attributes = die.GetAttributes(attributes); - if (num_attributes > 0) { - const char *name = nullptr; - const char *prop_name = nullptr; - const char *prop_getter_name = nullptr; - const char *prop_setter_name = nullptr; - uint32_t prop_attributes = 0; - - bool is_artificial = false; - DWARFFormValue encoding_form; - AccessType accessibility = eAccessNone; - uint32_t member_byte_offset = - (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; - llvm::Optional byte_size; - int64_t bit_offset = 0; - uint64_t data_bit_offset = UINT64_MAX; - size_t bit_size = 0; - bool is_external = - false; // On DW_TAG_members, this means the member is static - uint32_t i; - for (i = 0; i < num_attributes && !is_artificial; ++i) { - const dw_attr_t attr = attributes.AttributeAtIndex(i); - DWARFFormValue form_value; - if (attributes.ExtractFormValueAtIndex(i, form_value)) { - switch (attr) { - case DW_AT_name: - name = form_value.AsCString(); - break; - case DW_AT_type: - encoding_form = form_value; - break; - case DW_AT_bit_offset: - bit_offset = form_value.Signed(); - break; - case DW_AT_bit_size: - bit_size = form_value.Unsigned(); - break; - case DW_AT_byte_size: - byte_size = form_value.Unsigned(); - break; - case DW_AT_data_bit_offset: - data_bit_offset = form_value.Unsigned(); - break; - case DW_AT_data_member_location: - if (form_value.BlockData()) { - Value initialValue(0); - Value memberOffset(0); - const DWARFDataExtractor &debug_info_data = die.GetData(); - uint32_t block_length = form_value.Unsigned(); - uint32_t block_offset = - form_value.BlockData() - debug_info_data.GetDataStart(); - if (DWARFExpression::Evaluate( - nullptr, // ExecutionContext * - nullptr, // RegisterContext * - module_sp, - DataExtractor(debug_info_data, block_offset, - block_length), - die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, - memberOffset, nullptr)) { - member_byte_offset = - memberOffset.ResolveValue(nullptr).UInt(); - } - } else { - // With DWARF 3 and later, if the value is an integer constant, - // this form value is the offset in bytes from the beginning of - // the containing entity. - member_byte_offset = form_value.Unsigned(); - } - break; - - case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); - break; - case DW_AT_artificial: - is_artificial = form_value.Boolean(); - break; - case DW_AT_APPLE_property_name: - prop_name = form_value.AsCString(); - break; - case DW_AT_APPLE_property_getter: - prop_getter_name = form_value.AsCString(); - break; - case DW_AT_APPLE_property_setter: - prop_setter_name = form_value.AsCString(); - break; - case DW_AT_APPLE_property_attribute: - prop_attributes = form_value.Unsigned(); - break; - case DW_AT_external: - is_external = form_value.Boolean(); - break; - - default: - case DW_AT_declaration: - case DW_AT_description: - case DW_AT_mutable: - case DW_AT_visibility: - case DW_AT_sibling: - break; + DWARFAttributes attributes; + const size_t num_attributes = die.GetAttributes(attributes); + if (num_attributes > 0) { + const char *name = nullptr; + const char *prop_name = nullptr; + const char *prop_getter_name = nullptr; + const char *prop_setter_name = nullptr; + uint32_t prop_attributes = 0; + + bool is_artificial = false; + DWARFFormValue encoding_form; + AccessType accessibility = eAccessNone; + uint32_t member_byte_offset = + (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX; + llvm::Optional byte_size; + int64_t bit_offset = 0; + uint64_t data_bit_offset = UINT64_MAX; + size_t bit_size = 0; + bool is_external = + false; // On DW_TAG_members, this means the member is static + uint32_t i; + for (i = 0; i < num_attributes && !is_artificial; ++i) { + const dw_attr_t attr = attributes.AttributeAtIndex(i); + DWARFFormValue form_value; + if (attributes.ExtractFormValueAtIndex(i, form_value)) { + switch (attr) { + case DW_AT_name: + name = form_value.AsCString(); + break; + case DW_AT_type: + encoding_form = form_value; + break; + case DW_AT_bit_offset: + bit_offset = form_value.Signed(); + break; + case DW_AT_bit_size: + bit_size = form_value.Unsigned(); + break; + case DW_AT_byte_size: + byte_size = form_value.Unsigned(); + break; + case DW_AT_data_bit_offset: + data_bit_offset = form_value.Unsigned(); + break; + case DW_AT_data_member_location: + if (form_value.BlockData()) { + Value initialValue(0); + Value memberOffset(0); + const DWARFDataExtractor &debug_info_data = die.GetData(); + uint32_t block_length = form_value.Unsigned(); + uint32_t block_offset = + form_value.BlockData() - debug_info_data.GetDataStart(); + if (DWARFExpression::Evaluate( + nullptr, // ExecutionContext * + nullptr, // RegisterContext * + module_sp, + DataExtractor(debug_info_data, block_offset, block_length), + die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr, + memberOffset, nullptr)) { + member_byte_offset = memberOffset.ResolveValue(nullptr).UInt(); } + } else { + // With DWARF 3 and later, if the value is an integer constant, + // this form value is the offset in bytes from the beginning of + // the containing entity. + member_byte_offset = form_value.Unsigned(); } - } - - if (prop_name) { - ConstString fixed_getter; - ConstString fixed_setter; - - // Check if the property getter/setter were provided as full names. - // We want basenames, so we extract them. - - if (prop_getter_name && prop_getter_name[0] == '-') { - ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true); - prop_getter_name = prop_getter_method.GetSelector().GetCString(); - } + break; - if (prop_setter_name && prop_setter_name[0] == '-') { - ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true); - prop_setter_name = prop_setter_method.GetSelector().GetCString(); - } + case DW_AT_accessibility: + accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + break; + case DW_AT_artificial: + is_artificial = form_value.Boolean(); + break; + case DW_AT_APPLE_property_name: + prop_name = form_value.AsCString(); + break; + case DW_AT_APPLE_property_getter: + prop_getter_name = form_value.AsCString(); + break; + case DW_AT_APPLE_property_setter: + prop_setter_name = form_value.AsCString(); + break; + case DW_AT_APPLE_property_attribute: + prop_attributes = form_value.Unsigned(); + break; + case DW_AT_external: + is_external = form_value.Boolean(); + break; - // If the names haven't been provided, they need to be filled in. + default: + case DW_AT_declaration: + case DW_AT_description: + case DW_AT_mutable: + case DW_AT_visibility: + case DW_AT_sibling: + break; + } + } + } - if (!prop_getter_name) { - prop_getter_name = prop_name; - } - if (!prop_setter_name && prop_name[0] && - !(prop_attributes & DW_APPLE_PROPERTY_readonly)) { - StreamString ss; + if (prop_name) { + ConstString fixed_getter; + ConstString fixed_setter; - ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]); + // Check if the property getter/setter were provided as full names. + // We want basenames, so we extract them. - fixed_setter.SetString(ss.GetString()); - prop_setter_name = fixed_setter.GetCString(); - } - } + if (prop_getter_name && prop_getter_name[0] == '-') { + ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true); + prop_getter_name = prop_getter_method.GetSelector().GetCString(); + } - // Clang has a DWARF generation bug where sometimes it represents - // fields that are references with bad byte size and bit size/offset - // information such as: - // - // DW_AT_byte_size( 0x00 ) - // DW_AT_bit_size( 0x40 ) - // DW_AT_bit_offset( 0xffffffffffffffc0 ) - // - // So check the bit offset to make sure it is sane, and if the values - // are not sane, remove them. If we don't do this then we will end up - // with a crash if we try to use this type in an expression when clang - // becomes unhappy with its recycled debug info. - - if (byte_size.getValueOr(0) == 0 && bit_offset < 0) { - bit_size = 0; - bit_offset = 0; - } + if (prop_setter_name && prop_setter_name[0] == '-') { + ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true); + prop_setter_name = prop_setter_method.GetSelector().GetCString(); + } - // FIXME: Make Clang ignore Objective-C accessibility for expressions - if (class_language == eLanguageTypeObjC || - class_language == eLanguageTypeObjC_plus_plus) - accessibility = eAccessNone; - - // Handle static members - if (is_external && member_byte_offset == UINT32_MAX) { - Type *var_type = die.ResolveTypeUID(encoding_form.Reference()); - - if (var_type) { - if (accessibility == eAccessNone) - accessibility = eAccessPublic; - ClangASTContext::AddVariableToRecordType( - class_clang_type, name, var_type->GetLayoutCompilerType(), - accessibility); - } - break; - } + // If the names haven't been provided, they need to be filled in. - if (!is_artificial) { - Type *member_type = die.ResolveTypeUID(encoding_form.Reference()); - - clang::FieldDecl *field_decl = nullptr; - if (tag == DW_TAG_member) { - if (member_type) { - if (accessibility == eAccessNone) - accessibility = default_accessibility; - member_accessibilities.push_back(accessibility); - - uint64_t field_bit_offset = - (member_byte_offset == UINT32_MAX ? 0 - : (member_byte_offset * 8)); - if (bit_size > 0) { - - BitfieldInfo this_field_info; - this_field_info.bit_offset = field_bit_offset; - this_field_info.bit_size = bit_size; - - ///////////////////////////////////////////////////////////// - // How to locate a field given the DWARF debug information - // - // AT_byte_size indicates the size of the word in which the bit - // offset must be interpreted. - // - // AT_data_member_location indicates the byte offset of the - // word from the base address of the structure. - // - // AT_bit_offset indicates how many bits into the word - // (according to the host endianness) the low-order bit of the - // field starts. AT_bit_offset can be negative. - // - // AT_bit_size indicates the size of the field in bits. - ///////////////////////////////////////////////////////////// - - if (data_bit_offset != UINT64_MAX) { - this_field_info.bit_offset = data_bit_offset; - } else { - if (!byte_size) - byte_size = member_type->GetByteSize(); - - ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); - if (objfile->GetByteOrder() == eByteOrderLittle) { - this_field_info.bit_offset += byte_size.getValueOr(0) * 8; - this_field_info.bit_offset -= (bit_offset + bit_size); - } else { - this_field_info.bit_offset += bit_offset; - } - } + if (!prop_getter_name) { + prop_getter_name = prop_name; + } + if (!prop_setter_name && prop_name[0] && + !(prop_attributes & DW_APPLE_PROPERTY_readonly)) { + StreamString ss; - if ((this_field_info.bit_offset >= parent_bit_size) || - !last_field_info.NextBitfieldOffsetIsValid( - this_field_info.bit_offset)) { - ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); - objfile->GetModule()->ReportWarning( - "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid " - "bit offset (0x%8.8" PRIx64 - ") member will be ignored. Please file a bug against the " - "compiler and include the preprocessed output for %s\n", - die.GetID(), DW_TAG_value_to_name(tag), name, - this_field_info.bit_offset, - GetUnitName(parent_die).c_str()); - this_field_info.Clear(); - continue; - } + ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]); - // Update the field bit offset we will report for layout - field_bit_offset = this_field_info.bit_offset; - - // If the member to be emitted did not start on a character - // boundary and there is empty space between the last field and - // this one, then we need to emit an anonymous member filling - // up the space up to its start. There are three cases here: - // - // 1 If the previous member ended on a character boundary, then - // we can emit an - // anonymous member starting at the most recent character - // boundary. - // - // 2 If the previous member did not end on a character boundary - // and the distance - // from the end of the previous member to the current member - // is less than a - // word width, then we can emit an anonymous member starting - // right after the - // previous member and right before this member. - // - // 3 If the previous member did not end on a character boundary - // and the distance - // from the end of the previous member to the current member - // is greater than - // or equal a word width, then we act as in Case 1. - - const uint64_t character_width = 8; - const uint64_t word_width = 32; - - // Objective-C has invalid DW_AT_bit_offset values in older - // versions of clang, so we have to be careful and only insert - // unnamed bitfields if we have a new enough clang. - bool detect_unnamed_bitfields = true; - - if (class_language == eLanguageTypeObjC || - class_language == eLanguageTypeObjC_plus_plus) - detect_unnamed_bitfields = - die.GetCU()->Supports_unnamed_objc_bitfields(); - - if (detect_unnamed_bitfields) { - BitfieldInfo anon_field_info; - - if ((this_field_info.bit_offset % character_width) != - 0) // not char aligned - { - uint64_t last_field_end = 0; - - if (last_field_info.IsValid()) - last_field_end = - last_field_info.bit_offset + last_field_info.bit_size; - - if (this_field_info.bit_offset != last_field_end) { - if (((last_field_end % character_width) == 0) || // case 1 - (this_field_info.bit_offset - last_field_end >= - word_width)) // case 3 - { - anon_field_info.bit_size = - this_field_info.bit_offset % character_width; - anon_field_info.bit_offset = - this_field_info.bit_offset - - anon_field_info.bit_size; - } else // case 2 - { - anon_field_info.bit_size = - this_field_info.bit_offset - last_field_end; - anon_field_info.bit_offset = last_field_end; - } - } - } + fixed_setter.SetString(ss.GetString()); + prop_setter_name = fixed_setter.GetCString(); + } + } - if (anon_field_info.IsValid()) { - clang::FieldDecl *unnamed_bitfield_decl = - ClangASTContext::AddFieldToRecordType( - class_clang_type, llvm::StringRef(), - m_ast.GetBuiltinTypeForEncodingAndBitSize( - eEncodingSint, word_width), - accessibility, anon_field_info.bit_size); + // Clang has a DWARF generation bug where sometimes it represents + // fields that are references with bad byte size and bit size/offset + // information such as: + // + // DW_AT_byte_size( 0x00 ) + // DW_AT_bit_size( 0x40 ) + // DW_AT_bit_offset( 0xffffffffffffffc0 ) + // + // So check the bit offset to make sure it is sane, and if the values + // are not sane, remove them. If we don't do this then we will end up + // with a crash if we try to use this type in an expression when clang + // becomes unhappy with its recycled debug info. + + if (byte_size.getValueOr(0) == 0 && bit_offset < 0) { + bit_size = 0; + bit_offset = 0; + } + + // FIXME: Make Clang ignore Objective-C accessibility for expressions + if (class_language == eLanguageTypeObjC || + class_language == eLanguageTypeObjC_plus_plus) + accessibility = eAccessNone; + + // Handle static members + if (is_external && member_byte_offset == UINT32_MAX) { + Type *var_type = die.ResolveTypeUID(encoding_form.Reference()); + + if (var_type) { + if (accessibility == eAccessNone) + accessibility = eAccessPublic; + ClangASTContext::AddVariableToRecordType( + class_clang_type, name, var_type->GetLayoutCompilerType(), + accessibility); + } + return; + } + + if (!is_artificial) { + Type *member_type = die.ResolveTypeUID(encoding_form.Reference()); + + clang::FieldDecl *field_decl = nullptr; + if (tag == DW_TAG_member) { + if (member_type) { + if (accessibility == eAccessNone) + accessibility = default_accessibility; + member_accessibilities.push_back(accessibility); + + uint64_t field_bit_offset = + (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8)); + if (bit_size > 0) { + + BitfieldInfo this_field_info; + this_field_info.bit_offset = field_bit_offset; + this_field_info.bit_size = bit_size; + + ///////////////////////////////////////////////////////////// + // How to locate a field given the DWARF debug information + // + // AT_byte_size indicates the size of the word in which the bit + // offset must be interpreted. + // + // AT_data_member_location indicates the byte offset of the + // word from the base address of the structure. + // + // AT_bit_offset indicates how many bits into the word + // (according to the host endianness) the low-order bit of the + // field starts. AT_bit_offset can be negative. + // + // AT_bit_size indicates the size of the field in bits. + ///////////////////////////////////////////////////////////// + + if (data_bit_offset != UINT64_MAX) { + this_field_info.bit_offset = data_bit_offset; + } else { + if (!byte_size) + byte_size = member_type->GetByteSize(); - layout_info.field_offsets.insert(std::make_pair( - unnamed_bitfield_decl, anon_field_info.bit_offset)); - } - } - last_field_info = this_field_info; + ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); + if (objfile->GetByteOrder() == eByteOrderLittle) { + this_field_info.bit_offset += byte_size.getValueOr(0) * 8; + this_field_info.bit_offset -= (bit_offset + bit_size); } else { - last_field_info.Clear(); + this_field_info.bit_offset += bit_offset; } + } - CompilerType member_clang_type = - member_type->GetLayoutCompilerType(); - if (!member_clang_type.IsCompleteType()) - member_clang_type.GetCompleteType(); + if ((this_field_info.bit_offset >= parent_bit_size) || + !last_field_info.NextBitfieldOffsetIsValid( + this_field_info.bit_offset)) { + ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); + objfile->GetModule()->ReportWarning( + "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid " + "bit offset (0x%8.8" PRIx64 + ") member will be ignored. Please file a bug against the " + "compiler and include the preprocessed output for %s\n", + die.GetID(), DW_TAG_value_to_name(tag), name, + this_field_info.bit_offset, GetUnitName(parent_die).c_str()); + this_field_info.Clear(); + return; + } + // Update the field bit offset we will report for layout + field_bit_offset = this_field_info.bit_offset; + + // If the member to be emitted did not start on a character + // boundary and there is empty space between the last field and + // this one, then we need to emit an anonymous member filling + // up the space up to its start. There are three cases here: + // + // 1 If the previous member ended on a character boundary, then + // we can emit an + // anonymous member starting at the most recent character + // boundary. + // + // 2 If the previous member did not end on a character boundary + // and the distance + // from the end of the previous member to the current member + // is less than a + // word width, then we can emit an anonymous member starting + // right after the + // previous member and right before this member. + // + // 3 If the previous member did not end on a character boundary + // and the distance + // from the end of the previous member to the current member + // is greater than + // or equal a word width, then we act as in Case 1. + + const uint64_t character_width = 8; + const uint64_t word_width = 32; + + // Objective-C has invalid DW_AT_bit_offset values in older + // versions of clang, so we have to be careful and only insert + // unnamed bitfields if we have a new enough clang. + bool detect_unnamed_bitfields = true; + + if (class_language == eLanguageTypeObjC || + class_language == eLanguageTypeObjC_plus_plus) + detect_unnamed_bitfields = + die.GetCU()->Supports_unnamed_objc_bitfields(); + + if (detect_unnamed_bitfields) { + BitfieldInfo anon_field_info; + + if ((this_field_info.bit_offset % character_width) != + 0) // not char aligned { - // Older versions of clang emit array[0] and array[1] in the - // same way (). If the current field - // is at the end of the structure, then there is definitely no - // room for extra elements and we override the type to - // array[0]. - - CompilerType member_array_element_type; - uint64_t member_array_size; - bool member_array_is_incomplete; - - if (member_clang_type.IsArrayType( - &member_array_element_type, &member_array_size, - &member_array_is_incomplete) && - !member_array_is_incomplete) { - uint64_t parent_byte_size = - parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, - UINT64_MAX); - - if (member_byte_offset >= parent_byte_size) { - if (member_array_size != 1 && - (member_array_size != 0 || - member_byte_offset > parent_byte_size)) { - module_sp->ReportError( - "0x%8.8" PRIx64 - ": DW_TAG_member '%s' refers to type 0x%8.8x" - " which extends beyond the bounds of 0x%8.8" PRIx64, - die.GetID(), name, - encoding_form.Reference().GetOffset(), - parent_die.GetID()); - } + uint64_t last_field_end = 0; - member_clang_type = m_ast.CreateArrayType( - member_array_element_type, 0, false); + if (last_field_info.IsValid()) + last_field_end = + last_field_info.bit_offset + last_field_info.bit_size; + + if (this_field_info.bit_offset != last_field_end) { + if (((last_field_end % character_width) == 0) || // case 1 + (this_field_info.bit_offset - last_field_end >= + word_width)) // case 3 + { + anon_field_info.bit_size = + this_field_info.bit_offset % character_width; + anon_field_info.bit_offset = + this_field_info.bit_offset - anon_field_info.bit_size; + } else // case 2 + { + anon_field_info.bit_size = + this_field_info.bit_offset - last_field_end; + anon_field_info.bit_offset = last_field_end; } } } - if (ClangASTContext::IsCXXClassType(member_clang_type) && - !member_clang_type.GetCompleteType()) { - if (die.GetCU()->GetProducer() == eProducerClang) - module_sp->ReportError( - "DWARF DIE at 0x%8.8x (class %s) has a member variable " - "0x%8.8x (%s) whose type is a forward declaration, not a " - "complete definition.\nTry compiling the source file " - "with -fstandalone-debug", - parent_die.GetOffset(), parent_die.GetName(), - die.GetOffset(), name); - else - module_sp->ReportError( - "DWARF DIE at 0x%8.8x (class %s) has a member variable " - "0x%8.8x (%s) whose type is a forward declaration, not a " - "complete definition.\nPlease file a bug against the " - "compiler and include the preprocessed output for %s", - parent_die.GetOffset(), parent_die.GetName(), - die.GetOffset(), name, GetUnitName(parent_die).c_str()); - // We have no choice other than to pretend that the member - // class is complete. If we don't do this, clang will crash - // when trying to layout the class. Since we provide layout - // assistance, all ivars in this class and other classes will - // be fine, this is the best we can do short of crashing. - if (ClangASTContext::StartTagDeclarationDefinition( - member_clang_type)) { - ClangASTContext::CompleteTagDeclarationDefinition( - member_clang_type); - } else { - module_sp->ReportError( - "DWARF DIE at 0x%8.8x (class %s) has a member variable " - "0x%8.8x (%s) whose type claims to be a C++ class but we " - "were not able to start its definition.\nPlease file a " - "bug and attach the file at the start of this error " - "message", - parent_die.GetOffset(), parent_die.GetName(), - die.GetOffset(), name); - } + if (anon_field_info.IsValid()) { + clang::FieldDecl *unnamed_bitfield_decl = + ClangASTContext::AddFieldToRecordType( + class_clang_type, llvm::StringRef(), + m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint, + word_width), + accessibility, anon_field_info.bit_size); + + layout_info.field_offsets.insert(std::make_pair( + unnamed_bitfield_decl, anon_field_info.bit_offset)); } + } + last_field_info = this_field_info; + } else { + last_field_info.Clear(); + } - field_decl = ClangASTContext::AddFieldToRecordType( - class_clang_type, name, member_clang_type, accessibility, - bit_size); + CompilerType member_clang_type = member_type->GetLayoutCompilerType(); + if (!member_clang_type.IsCompleteType()) + member_clang_type.GetCompleteType(); + + { + // Older versions of clang emit array[0] and array[1] in the + // same way (). If the current field + // is at the end of the structure, then there is definitely no + // room for extra elements and we override the type to + // array[0]. + + CompilerType member_array_element_type; + uint64_t member_array_size; + bool member_array_is_incomplete; + + if (member_clang_type.IsArrayType(&member_array_element_type, + &member_array_size, + &member_array_is_incomplete) && + !member_array_is_incomplete) { + uint64_t parent_byte_size = + parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, + UINT64_MAX); + + if (member_byte_offset >= parent_byte_size) { + if (member_array_size != 1 && + (member_array_size != 0 || + member_byte_offset > parent_byte_size)) { + module_sp->ReportError( + "0x%8.8" PRIx64 + ": DW_TAG_member '%s' refers to type 0x%8.8x" + " which extends beyond the bounds of 0x%8.8" PRIx64, + die.GetID(), name, encoding_form.Reference().GetOffset(), + parent_die.GetID()); + } - m_ast.SetMetadataAsUserID(field_decl, die.GetID()); + member_clang_type = + m_ast.CreateArrayType(member_array_element_type, 0, false); + } + } + } - layout_info.field_offsets.insert( - std::make_pair(field_decl, field_bit_offset)); + if (ClangASTContext::IsCXXClassType(member_clang_type) && + !member_clang_type.GetCompleteType()) { + if (die.GetCU()->GetProducer() == eProducerClang) + module_sp->ReportError( + "DWARF DIE at 0x%8.8x (class %s) has a member variable " + "0x%8.8x (%s) whose type is a forward declaration, not a " + "complete definition.\nTry compiling the source file " + "with -fstandalone-debug", + parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(), + name); + else + module_sp->ReportError( + "DWARF DIE at 0x%8.8x (class %s) has a member variable " + "0x%8.8x (%s) whose type is a forward declaration, not a " + "complete definition.\nPlease file a bug against the " + "compiler and include the preprocessed output for %s", + parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(), + name, GetUnitName(parent_die).c_str()); + // We have no choice other than to pretend that the member + // class is complete. If we don't do this, clang will crash + // when trying to layout the class. Since we provide layout + // assistance, all ivars in this class and other classes will + // be fine, this is the best we can do short of crashing. + if (ClangASTContext::StartTagDeclarationDefinition( + member_clang_type)) { + ClangASTContext::CompleteTagDeclarationDefinition( + member_clang_type); } else { - if (name) - module_sp->ReportError( - "0x%8.8" PRIx64 - ": DW_TAG_member '%s' refers to type 0x%8.8x" - " which was unable to be parsed", - die.GetID(), name, encoding_form.Reference().GetOffset()); - else - module_sp->ReportError( - "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x" - " which was unable to be parsed", - die.GetID(), encoding_form.Reference().GetOffset()); + module_sp->ReportError( + "DWARF DIE at 0x%8.8x (class %s) has a member variable " + "0x%8.8x (%s) whose type claims to be a C++ class but we " + "were not able to start its definition.\nPlease file a " + "bug and attach the file at the start of this error " + "message", + parent_die.GetOffset(), parent_die.GetName(), die.GetOffset(), + name); } } - if (prop_name != nullptr && member_type) { - clang::ObjCIvarDecl *ivar_decl = nullptr; + field_decl = ClangASTContext::AddFieldToRecordType( + class_clang_type, name, member_clang_type, accessibility, + bit_size); - if (field_decl) { - ivar_decl = clang::dyn_cast(field_decl); - assert(ivar_decl != nullptr); - } + m_ast.SetMetadataAsUserID(field_decl, die.GetID()); + + layout_info.field_offsets.insert( + std::make_pair(field_decl, field_bit_offset)); + } else { + if (name) + module_sp->ReportError( + "0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x" + " which was unable to be parsed", + die.GetID(), name, encoding_form.Reference().GetOffset()); + else + module_sp->ReportError( + "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x" + " which was unable to be parsed", + die.GetID(), encoding_form.Reference().GetOffset()); + } + } - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); - delayed_properties.push_back(DelayedAddObjCClassProperty( - class_clang_type, prop_name, - member_type->GetLayoutCompilerType(), ivar_decl, - prop_setter_name, prop_getter_name, prop_attributes, - &metadata)); + if (prop_name != nullptr && member_type) { + clang::ObjCIvarDecl *ivar_decl = nullptr; - if (ivar_decl) - m_ast.SetMetadataAsUserID(ivar_decl, die.GetID()); - } + if (field_decl) { + ivar_decl = clang::dyn_cast(field_decl); + assert(ivar_decl != nullptr); } + + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); + delayed_properties.push_back(DelayedAddObjCClassProperty( + class_clang_type, prop_name, member_type->GetLayoutCompilerType(), + ivar_decl, prop_setter_name, prop_getter_name, prop_attributes, + &metadata)); + + if (ivar_decl) + m_ast.SetMetadataAsUserID(ivar_decl, die.GetID()); } - ++member_idx; - } break; + } + } +} + +bool DWARFASTParserClang::ParseChildMembers( + const DWARFDIE &parent_die, CompilerType &class_clang_type, + const LanguageType class_language, + std::vector> &base_classes, + std::vector &member_accessibilities, + std::vector &member_function_dies, + DelayedPropertyList &delayed_properties, AccessType &default_accessibility, + bool &is_a_class, ClangASTImporter::LayoutInfo &layout_info) { + if (!parent_die) + return false; + + BitfieldInfo last_field_info; + + ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule(); + ClangASTContext *ast = + llvm::dyn_cast_or_null(class_clang_type.GetTypeSystem()); + if (ast == nullptr) + return false; + + for (DWARFDIE die = parent_die.GetFirstChild(); die.IsValid(); + die = die.GetSibling()) { + dw_tag_t tag = die.Tag(); + + switch (tag) { + case DW_TAG_member: + case DW_TAG_APPLE_property: + ParseSingleMember(die, parent_die, class_clang_type, class_language, + member_accessibilities, default_accessibility, + delayed_properties, layout_info, last_field_info); + break; case DW_TAG_subprogram: // Let the type parsing code handle this one for us. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 982a089981d4e..c5b630e435e94 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -168,6 +168,65 @@ class DWARFASTParserClang : public DWARFASTParser { // Return true if this type is a declaration to a type in an external // module. lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + +private: + struct BitfieldInfo { + uint64_t bit_size; + uint64_t bit_offset; + + BitfieldInfo() + : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {} + + void Clear() { + bit_size = LLDB_INVALID_ADDRESS; + bit_offset = LLDB_INVALID_ADDRESS; + } + + bool IsValid() const { + return (bit_size != LLDB_INVALID_ADDRESS) && + (bit_offset != LLDB_INVALID_ADDRESS); + } + + bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const { + if (IsValid()) { + // This bitfield info is valid, so any subsequent bitfields must not + // overlap and must be at a higher bit offset than any previous bitfield + // + size. + return (bit_size + bit_offset) <= next_bit_offset; + } else { + // If the this BitfieldInfo is not valid, then any offset isOK + return true; + } + } + }; + + void + ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, + lldb_private::CompilerType &class_clang_type, + const lldb::LanguageType class_language, + std::vector &member_accessibilities, + lldb::AccessType &default_accessibility, + DelayedPropertyList &delayed_properties, + lldb_private::ClangASTImporter::LayoutInfo &layout_info, + BitfieldInfo &last_field_info); + + bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + lldb_private::CompilerType &clang_type); + bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + lldb_private::CompilerType &clang_type); + + lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + // FIXME: attrs should be passed as a const reference. + lldb::TypeSP ParseArrayType(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs); }; /// Parsed form of all attributes that are relevant for type reconstruction. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index eb307ce1cce1b..db8d7b3747ecd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -81,7 +81,8 @@ const DWARFDataExtractor &DWARFContext::getOrLoadRangesData() { } const DWARFDataExtractor &DWARFContext::getOrLoadRngListsData() { - return LoadOrGetSection(eSectionTypeDWARFDebugRngLists, llvm::None, + return LoadOrGetSection(eSectionTypeDWARFDebugRngLists, + eSectionTypeDWARFDebugRngListsDwo, m_data_debug_rnglists); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index d1b066ffe80cb..056cf33a202f1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -16,7 +16,6 @@ #include "DWARFTypeUnit.h" #include "DWARFUnit.h" #include "SymbolFileDWARF.h" -#include "lldb/Core/STLUtils.h" #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" @@ -24,11 +23,6 @@ namespace lldb_private { class DWARFContext; } -typedef std::multimap - CStringToDIEMap; -typedef CStringToDIEMap::iterator CStringToDIEMapIter; -typedef CStringToDIEMap::const_iterator CStringToDIEMapConstIter; - class DWARFDebugInfo { public: typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index 8c0fbeb4b717b..1bab4e9db6343 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -200,7 +200,7 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &data, return false; } -static DWARFRangeList GetRangesOrReportError(const DWARFUnit &unit, +static DWARFRangeList GetRangesOrReportError(DWARFUnit &unit, const DWARFDebugInfoEntry &die, const DWARFFormValue &value) { llvm::Expected expected_ranges = @@ -223,7 +223,7 @@ static DWARFRangeList GetRangesOrReportError(const DWARFUnit &unit, // Gets the valid address ranges for a given DIE by looking for a // DW_AT_low_pc/DW_AT_high_pc pair, DW_AT_entry_pc, or DW_AT_ranges attributes. bool DWARFDebugInfoEntry::GetDIENamesAndRanges( - const DWARFUnit *cu, const char *&name, const char *&mangled, + DWARFUnit *cu, const char *&name, const char *&mangled, DWARFRangeList &ranges, int &decl_file, int &decl_line, int &decl_column, int &call_file, int &call_line, int &call_column, DWARFExpression *frame_base) const { @@ -766,7 +766,7 @@ bool DWARFDebugInfoEntry::GetAttributeAddressRange( } size_t DWARFDebugInfoEntry::GetAttributeAddressRanges( - const DWARFUnit *cu, DWARFRangeList &ranges, bool check_hi_lo_pc, + DWARFUnit *cu, DWARFRangeList &ranges, bool check_hi_lo_pc, bool check_specification_or_abstract_origin) const { ranges.Clear(); @@ -1012,8 +1012,7 @@ DWARFDebugInfoEntry::GetQualifiedName(DWARFUnit *cu, return storage.c_str(); } -bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address, - const DWARFUnit *cu, +bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address, DWARFUnit *cu, DWARFDebugInfoEntry **function_die, DWARFDebugInfoEntry **block_die) { bool found_address = false; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index f3952ae9598b2..f35af6e7d498a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -50,7 +50,7 @@ class DWARFDebugInfoEntry { bool Extract(const lldb_private::DWARFDataExtractor &data, const DWARFUnit *cu, lldb::offset_t *offset_ptr); - bool LookupAddress(const dw_addr_t address, const DWARFUnit *cu, + bool LookupAddress(const dw_addr_t address, DWARFUnit *cu, DWARFDebugInfoEntry **function_die, DWARFDebugInfoEntry **block_die); @@ -91,7 +91,7 @@ class DWARFDebugInfoEntry { bool check_specification_or_abstract_origin = false) const; size_t GetAttributeAddressRanges( - const DWARFUnit *cu, DWARFRangeList &ranges, bool check_hi_lo_pc, + DWARFUnit *cu, DWARFRangeList &ranges, bool check_hi_lo_pc, bool check_specification_or_abstract_origin = false) const; const char *GetName(const DWARFUnit *cu) const; @@ -116,7 +116,7 @@ class DWARFDebugInfoEntry { dw_attr_t attr, DWARFFormValue &form_value); bool GetDIENamesAndRanges( - const DWARFUnit *cu, const char *&name, const char *&mangled, + DWARFUnit *cu, const char *&name, const char *&mangled, DWARFRangeList &rangeList, int &decl_file, int &decl_line, int &decl_column, int &call_file, int &call_line, int &call_column, lldb_private::DWARFExpression *frame_base = nullptr) const; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp index 0b08fa09f9063..3b344f4509159 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp @@ -122,164 +122,3 @@ bool DWARFDebugRanges::FindRanges(const DWARFUnit *cu, } return false; } - -bool DWARFDebugRngLists::ExtractRangeList( - const DWARFDataExtractor &data, uint8_t addrSize, - lldb::offset_t *offset_ptr, std::vector &rangeList) { - rangeList.clear(); - - bool error = false; - while (!error) { - switch (data.GetU8(offset_ptr)) { - case DW_RLE_end_of_list: - return true; - - case DW_RLE_start_length: { - dw_addr_t begin = data.GetMaxU64(offset_ptr, addrSize); - dw_addr_t len = data.GetULEB128(offset_ptr); - rangeList.push_back({DW_RLE_start_length, begin, len}); - break; - } - - case DW_RLE_start_end: { - dw_addr_t begin = data.GetMaxU64(offset_ptr, addrSize); - dw_addr_t end = data.GetMaxU64(offset_ptr, addrSize); - rangeList.push_back({DW_RLE_start_end, begin, end}); - break; - } - - case DW_RLE_base_address: { - dw_addr_t base = data.GetMaxU64(offset_ptr, addrSize); - rangeList.push_back({DW_RLE_base_address, base, 0}); - break; - } - - case DW_RLE_offset_pair: { - dw_addr_t begin = data.GetULEB128(offset_ptr); - dw_addr_t end = data.GetULEB128(offset_ptr); - rangeList.push_back({DW_RLE_offset_pair, begin, end}); - break; - } - - case DW_RLE_base_addressx: { - dw_addr_t base = data.GetULEB128(offset_ptr); - rangeList.push_back({DW_RLE_base_addressx, base, 0}); - break; - } - - case DW_RLE_startx_endx: { - dw_addr_t start = data.GetULEB128(offset_ptr); - dw_addr_t end = data.GetULEB128(offset_ptr); - rangeList.push_back({DW_RLE_startx_endx, start, end}); - break; - } - - case DW_RLE_startx_length: { - dw_addr_t start = data.GetULEB128(offset_ptr); - dw_addr_t length = data.GetULEB128(offset_ptr); - rangeList.push_back({DW_RLE_startx_length, start, length}); - break; - } - - default: - lldbassert(0 && "unknown range list entry encoding"); - error = true; - } - } - - return false; -} - -static uint64_t ReadAddressFromDebugAddrSection(const DWARFUnit *cu, - uint32_t index) { - uint32_t index_size = cu->GetAddressByteSize(); - dw_offset_t addr_base = cu->GetAddrBase(); - lldb::offset_t offset = addr_base + index * index_size; - return cu->GetSymbolFileDWARF() - .GetDWARFContext() - .getOrLoadAddrData() - .GetMaxU64(&offset, index_size); -} - -bool DWARFDebugRngLists::FindRanges(const DWARFUnit *cu, - dw_offset_t debug_ranges_offset, - DWARFRangeList &range_list) const { - range_list.Clear(); - dw_addr_t debug_ranges_address = cu->GetRangesBase() + debug_ranges_offset; - auto pos = m_range_map.find(debug_ranges_address); - if (pos != m_range_map.end()) { - dw_addr_t BaseAddr = cu->GetBaseAddress(); - for (const RngListEntry &E : pos->second) { - switch (E.encoding) { - case DW_RLE_start_length: - range_list.Append(DWARFRangeList::Entry(E.value0, E.value1)); - break; - case DW_RLE_base_address: - BaseAddr = E.value0; - break; - case DW_RLE_start_end: - range_list.Append(DWARFRangeList::Entry(E.value0, E.value1 - E.value0)); - break; - case DW_RLE_offset_pair: - range_list.Append( - DWARFRangeList::Entry(BaseAddr + E.value0, E.value1 - E.value0)); - break; - case DW_RLE_base_addressx: { - BaseAddr = ReadAddressFromDebugAddrSection(cu, E.value0); - break; - } - case DW_RLE_startx_endx: { - dw_addr_t start = ReadAddressFromDebugAddrSection(cu, E.value0); - dw_addr_t end = ReadAddressFromDebugAddrSection(cu, E.value1); - range_list.Append(DWARFRangeList::Entry(start, end - start)); - break; - } - case DW_RLE_startx_length: { - dw_addr_t start = ReadAddressFromDebugAddrSection(cu, E.value0); - range_list.Append(DWARFRangeList::Entry(start, E.value1)); - break; - } - default: - llvm_unreachable("unexpected encoding"); - } - } - return true; - } - return false; -} - -void DWARFDebugRngLists::Extract(DWARFContext &context) { - const DWARFDataExtractor &data = context.getOrLoadRngListsData(); - lldb::offset_t offset = 0; - - uint64_t length = data.GetU32(&offset); - // FIXME: Handle DWARF64. - lldb::offset_t end = offset + length; - - // Check version. - if (data.GetU16(&offset) < 5) - return; - - uint8_t addrSize = data.GetU8(&offset); - - // We do not support non-zero segment selector size. - if (data.GetU8(&offset) != 0) { - lldbassert(0 && "not implemented"); - return; - } - - uint32_t offsetsAmount = data.GetU32(&offset); - for (uint32_t i = 0; i < offsetsAmount; ++i) - Offsets.push_back(data.GetMaxU64(&offset, 4)); - - lldb::offset_t listOffset = offset; - std::vector rangeList; - while (offset < end && ExtractRangeList(data, addrSize, &offset, rangeList)) { - m_range_map[listOffset] = rangeList; - listOffset = offset; - } -} - -uint64_t DWARFDebugRngLists::GetOffset(size_t Index) const { - return Offsets[Index]; -} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h index c398259056b3e..99ef04d7ee214 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h @@ -48,27 +48,4 @@ class DWARFDebugRanges final : public DWARFDebugRangesBase { range_map m_range_map; }; -// DWARF v5 .debug_rnglists section. -class DWARFDebugRngLists final : public DWARFDebugRangesBase { - struct RngListEntry { - uint8_t encoding; - uint64_t value0; - uint64_t value1; - }; - -public: - void Extract(lldb_private::DWARFContext &context) override; - bool FindRanges(const DWARFUnit *cu, dw_offset_t debug_ranges_offset, - DWARFRangeList &range_list) const override; - uint64_t GetOffset(size_t Index) const; - -protected: - bool ExtractRangeList(const lldb_private::DWARFDataExtractor &data, - uint8_t addrSize, lldb::offset_t *offset_ptr, - std::vector &list); - - std::vector Offsets; - std::map> m_range_map; -}; - #endif // SymbolFileDWARF_DWARFDebugRanges_h_ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 9964cf4b893c4..71375da844da7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -417,8 +417,44 @@ dw_offset_t DWARFUnit::GetLineTableOffset() { void DWARFUnit::SetAddrBase(dw_addr_t addr_base) { m_addr_base = addr_base; } +// Parse the rangelist table header, including the optional array of offsets +// following it (DWARF v5 and later). +template +static llvm::Expected +ParseListTableHeader(const llvm::DWARFDataExtractor &data, uint64_t offset, + DwarfFormat format) { + // We are expected to be called with Offset 0 or pointing just past the table + // header. Correct Offset in the latter case so that it points to the start + // of the header. + if (offset > 0) { + uint64_t HeaderSize = llvm::DWARFListTableHeader::getHeaderSize(format); + if (offset < HeaderSize) + return llvm::createStringError(errc::invalid_argument, + "did not detect a valid" + " list table with base = 0x%" PRIx64 "\n", + offset); + offset -= HeaderSize; + } + ListTableType Table; + if (llvm::Error E = Table.extractHeaderAndOffsets(data, &offset)) + return std::move(E); + return Table; +} + void DWARFUnit::SetRangesBase(dw_addr_t ranges_base) { m_ranges_base = ranges_base; + + if (GetVersion() < 5) + return; + + if (auto table_or_error = ParseListTableHeader( + m_dwarf.GetDWARFContext().getOrLoadRngListsData().GetAsLLVM(), + ranges_base, DWARF32)) + m_rnglist_table = std::move(table_or_error.get()); + else + GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError( + "Failed to extract range list table at offset 0x%" PRIx64 ": %s", + ranges_base, toString(table_or_error.takeError()).c_str()); } void DWARFUnit::SetStrOffsetsBase(dw_offset_t str_offsets_base) { @@ -845,30 +881,56 @@ uint32_t DWARFUnit::GetHeaderByteSize() const { } llvm::Expected -DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) const { - const DWARFDebugRangesBase *debug_ranges; - llvm::StringRef section; +DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) { if (GetVersion() <= 4) { - debug_ranges = m_dwarf.GetDebugRanges(); - section = "debug_ranges"; - } else { - debug_ranges = m_dwarf.GetDebugRngLists(); - section = "debug_rnglists"; + const DWARFDebugRangesBase *debug_ranges = m_dwarf.GetDebugRanges(); + if (!debug_ranges) + return llvm::make_error( + "No debug_ranges section"); + DWARFRangeList ranges; + debug_ranges->FindRanges(this, offset, ranges); + return ranges; } - if (!debug_ranges) - return llvm::make_error("No " + section + - " section"); + + if (!m_rnglist_table) + return llvm::createStringError(errc::invalid_argument, + "missing or invalid range list table"); + + auto range_list_or_error = m_rnglist_table->findList( + m_dwarf.GetDWARFContext().getOrLoadRngListsData().GetAsLLVM(), offset); + if (!range_list_or_error) + return range_list_or_error.takeError(); + + llvm::Expected llvm_ranges = + range_list_or_error->getAbsoluteRanges( + llvm::object::SectionedAddress{GetBaseAddress()}, + [&](uint32_t index) { + uint32_t index_size = GetAddressByteSize(); + dw_offset_t addr_base = GetAddrBase(); + lldb::offset_t offset = addr_base + index * index_size; + return llvm::object::SectionedAddress{ + m_dwarf.GetDWARFContext().getOrLoadAddrData().GetMaxU64( + &offset, index_size)}; + }); + if (!llvm_ranges) + return llvm_ranges.takeError(); DWARFRangeList ranges; - debug_ranges->FindRanges(this, offset, ranges); + for (const llvm::DWARFAddressRange &llvm_range : *llvm_ranges) { + ranges.Append(DWARFRangeList::Entry(llvm_range.LowPC, + llvm_range.HighPC - llvm_range.LowPC)); + } return ranges; } llvm::Expected -DWARFUnit::FindRnglistFromIndex(uint32_t index) const { - const DWARFDebugRngLists *debug_rnglists = m_dwarf.GetDebugRngLists(); - if (!debug_rnglists) - return llvm::make_error( - "No debug_rnglists section"); - return FindRnglistFromOffset(debug_rnglists->GetOffset(index)); +DWARFUnit::FindRnglistFromIndex(uint32_t index) { + if (llvm::Optional offset = GetRnglistOffset(index)) + return FindRnglistFromOffset(*offset); + if (m_rnglist_table) + return llvm::createStringError(errc::invalid_argument, + "invalid range list table index %d", index); + + return llvm::createStringError(errc::invalid_argument, + "missing or invalid range list table"); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 87e0de283de4b..fe64222f8f50b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -216,12 +216,23 @@ class DWARFUnit : public lldb_private::UserID { /// Return a list of address ranges resulting from a (possibly encoded) /// range list starting at a given offset in the appropriate ranges section. - llvm::Expected FindRnglistFromOffset(dw_offset_t offset) const; + llvm::Expected FindRnglistFromOffset(dw_offset_t offset); /// Return a list of address ranges retrieved from an encoded range /// list whose offset is found via a table lookup given an index (DWARF v5 /// and later). - llvm::Expected FindRnglistFromIndex(uint32_t index) const; + llvm::Expected FindRnglistFromIndex(uint32_t index); + + /// Return a rangelist's offset based on an index. The index designates + /// an entry in the rangelist table's offset array and is supplied by + /// DW_FORM_rnglistx. + llvm::Optional GetRnglistOffset(uint32_t Index) const { + if (!m_rnglist_table) + return llvm::None; + if (llvm::Optional off = m_rnglist_table->getOffsetEntry(Index)) + return *off + m_ranges_base; + return llvm::None; + } protected: DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, @@ -288,6 +299,9 @@ class DWARFUnit : public lldb_private::UserID { dw_offset_t m_line_table_offset = DW_INVALID_OFFSET; dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base. + + llvm::Optional m_rnglist_table; + const DIERef::Section m_section; private: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index fcdff01dd20b9..9b9077a450b3a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -198,15 +198,23 @@ GetFileByIndex(const llvm::DWARFDebugLine::Prologue &prologue, size_t idx, return std::move(rel_path); } -static FileSpecList ParseSupportFilesFromPrologue( - const lldb::ModuleSP &module, - const llvm::DWARFDebugLine::Prologue &prologue, FileSpec::Style style, - llvm::StringRef compile_dir = {}, FileSpec first_file = {}) { +static FileSpecList +ParseSupportFilesFromPrologue(const lldb::ModuleSP &module, + const llvm::DWARFDebugLine::Prologue &prologue, + FileSpec::Style style, + llvm::StringRef compile_dir = {}) { FileSpecList support_files; - support_files.Append(first_file); + size_t first_file = 0; + if (prologue.getVersion() <= 4) { + // File index 0 is not valid before DWARF v5. Add a dummy entry to ensure + // support file list indices match those we get from the debug info and line + // tables. + support_files.Append(FileSpec()); + first_file = 1; + } const size_t number_of_files = prologue.FileNames.size(); - for (size_t idx = 1; idx <= number_of_files; ++idx) { + for (size_t idx = first_file; idx <= number_of_files; ++idx) { std::string remapped_file; if (auto file_path = GetFileByIndex(prologue, idx, compile_dir, style)) if (!module->RemapSourceFile(llvm::StringRef(*file_path), remapped_file)) @@ -676,21 +684,6 @@ DWARFDebugRanges *SymbolFileDWARF::GetDebugRanges() { return m_ranges.get(); } -DWARFDebugRngLists *SymbolFileDWARF::GetDebugRngLists() { - if (!m_rnglists) { - static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); - Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION, - static_cast(this)); - - if (m_context.getOrLoadRngListsData().GetByteSize() > 0) - m_rnglists.reset(new DWARFDebugRngLists()); - - if (m_rnglists) - m_rnglists->Extract(m_context); - } - return m_rnglists.get(); -} - lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFCompileUnit &dwarf_cu) { CompUnitSP cu_sp; CompileUnit *comp_unit = (CompileUnit *)dwarf_cu.GetUserData(); @@ -1046,7 +1039,7 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) { comp_unit.SetSupportFiles(ParseSupportFilesFromPrologue( comp_unit.GetModule(), line_table->Prologue, dwarf_cu->GetPathStyle(), - dwarf_cu->GetCompilationDirectory().GetCString(), FileSpec(comp_unit))); + dwarf_cu->GetCompilationDirectory().GetCString())); return true; } @@ -1949,9 +1942,8 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec, if (!dc_cu) continue; - const bool full_match = (bool)file_spec.GetDirectory(); bool file_spec_matches_cu_file_spec = - FileSpec::Equal(file_spec, *dc_cu, full_match); + FileSpec::Match(file_spec, dc_cu->GetPrimaryFile()); if (check_inlines || file_spec_matches_cu_file_spec) { SymbolContext sc(m_objfile_sp->GetModule()); sc.comp_unit = dc_cu; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 9e4e4279eec9f..35b18f4b02b35 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -47,7 +47,6 @@ class DWARFDebugInfo; class DWARFDebugInfoEntry; class DWARFDebugLine; class DWARFDebugRanges; -class DWARFDebugRngLists; class DWARFDeclContext; class DWARFFormValue; class DWARFTypeUnit; @@ -236,7 +235,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, const DWARFDebugInfo *DebugInfo() const; DWARFDebugRanges *GetDebugRanges(); - DWARFDebugRngLists *GetDebugRngLists(); const lldb_private::DWARFDataExtractor &DebugLocData(); @@ -499,7 +497,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, typedef llvm::StringMap NameToOffsetMap; NameToOffsetMap m_function_scope_qualified_name_map; std::unique_ptr m_ranges; - std::unique_ptr m_rnglists; UniqueDWARFASTTypeMap m_unique_ast_type_map; DIEToTypePtr m_die_to_type; DIEToVariableSP m_die_to_variable_sp; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index dbdbf49929412..cce666a222d07 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -604,7 +604,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo * SymbolFileDWARFDebugMap::GetCompUnitInfo(const CompileUnit &comp_unit) { const uint32_t cu_count = GetNumCompileUnits(); for (uint32_t i = 0; i < cu_count; ++i) { - if (comp_unit == m_compile_unit_infos[i].compile_unit_sp.get()) + if (&comp_unit == m_compile_unit_infos[i].compile_unit_sp.get()) return &m_compile_unit_infos[i]; } return nullptr; @@ -812,12 +812,8 @@ uint32_t SymbolFileDWARFDebugMap::ResolveSymbolContext( if (!resolve) { FileSpec so_file_spec; - if (GetFileSpecForSO(i, so_file_spec)) { - // Match the full path if the incoming file_spec has a directory (not - // just a basename) - const bool full_match = (bool)file_spec.GetDirectory(); - resolve = FileSpec::Equal(file_spec, so_file_spec, full_match); - } + if (GetFileSpecForSO(i, so_file_spec)) + resolve = FileSpec::Match(file_spec, so_file_spec); } if (resolve) { SymbolFileDWARF *oso_dwarf = GetSymbolFileByOSOIndex(i); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index f0308e23c9d77..22d1b08ea9e7e 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -1110,9 +1110,7 @@ bool SymbolFileNativePDB::ParseLineTable(CompileUnit &comp_unit) { // LLDB wants the index of the file in the list of support files. auto fn_iter = llvm::find(cci->m_file_list, *efn); lldbassert(fn_iter != cci->m_file_list.end()); - // LLDB support file indices are 1-based. - uint32_t file_index = - 1 + std::distance(cci->m_file_list.begin(), fn_iter); + uint32_t file_index = std::distance(cci->m_file_list.begin(), fn_iter); std::unique_ptr sequence( line_table->CreateLineSequenceContainer()); @@ -1155,14 +1153,6 @@ bool SymbolFileNativePDB::ParseSupportFiles(CompileUnit &comp_unit, FileSpec spec(f, style); support_files.Append(spec); } - - llvm::SmallString<64> main_source_file = - m_index->compilands().GetMainSourceFile(*cci); - FileSpec::Style style = main_source_file.startswith("/") - ? FileSpec::Style::posix - : FileSpec::Style::windows; - FileSpec spec(main_source_file, style); - support_files.Insert(0, spec); return true; } diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index e7bc730ca38b8..b3e06fdd1a5db 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -371,10 +371,6 @@ bool SymbolFilePDB::ParseSupportFiles( support_files.AppendIfUnique(spec); } - // LLDB uses the DWARF-like file numeration (one based), - // the zeroth file is the compile unit itself - support_files.Insert(0, comp_unit); - return true; } @@ -1780,7 +1776,6 @@ bool SymbolFilePDB::ParseCompileUnitLineTable(CompileUnit &comp_unit, auto line_table = std::make_unique(&comp_unit); // Find contributions to `compiland` from all source and header files. - std::string path = comp_unit.GetPath(); auto files = m_session_up->getSourceFilesForCompiland(*compiland_up); if (!files) return false; @@ -1882,9 +1877,7 @@ void SymbolFilePDB::BuildSupportFileIdToSupportFileIndexMap( if (!source_files) return; - // LLDB uses the DWARF-like file numeration (one based) - int index = 1; - + int index = 0; while (auto file = source_files->getNext()) { uint32_t source_id = file->getUniqueId(); index_map[source_id] = index++; diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index e61e5763fabb9..d4d7a8937c127 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -119,14 +119,17 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, SectionList *objfile_section_list = dsym_objfile_sp->GetSectionList(); static const SectionType g_sections[] = { - eSectionTypeDWARFDebugAbbrev, eSectionTypeDWARFDebugAddr, - eSectionTypeDWARFDebugAranges, eSectionTypeDWARFDebugCuIndex, - eSectionTypeDWARFDebugFrame, eSectionTypeDWARFDebugInfo, - eSectionTypeDWARFDebugLine, eSectionTypeDWARFDebugLoc, - eSectionTypeDWARFDebugMacInfo, eSectionTypeDWARFDebugPubNames, - eSectionTypeDWARFDebugPubTypes, eSectionTypeDWARFDebugRanges, - eSectionTypeDWARFDebugStr, eSectionTypeDWARFDebugStrOffsets, - eSectionTypeELFSymbolTable, eSectionTypeDWARFGNUDebugAltLink, + eSectionTypeDWARFDebugAbbrev, eSectionTypeDWARFDebugAddr, + eSectionTypeDWARFDebugAranges, eSectionTypeDWARFDebugCuIndex, + eSectionTypeDWARFDebugFrame, eSectionTypeDWARFDebugInfo, + eSectionTypeDWARFDebugLine, eSectionTypeDWARFDebugLineStr, + eSectionTypeDWARFDebugLoc, eSectionTypeDWARFDebugLocLists, + eSectionTypeDWARFDebugMacInfo, eSectionTypeDWARFDebugMacro, + eSectionTypeDWARFDebugNames, eSectionTypeDWARFDebugPubNames, + eSectionTypeDWARFDebugPubTypes, eSectionTypeDWARFDebugRanges, + eSectionTypeDWARFDebugRngLists, eSectionTypeDWARFDebugStr, + eSectionTypeDWARFDebugStrOffsets, eSectionTypeDWARFDebugTypes, + eSectionTypeELFSymbolTable, eSectionTypeDWARFGNUDebugAltLink, }; for (SectionType section_type : g_sections) { if (SectionSP section_sp = diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index 244ac8ce5ff87..e6435a2611741 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -15,24 +15,6 @@ #include #include - -// Clang headers like to use NDEBUG inside of them to enable/disable debug -// related features using "#ifndef NDEBUG" preprocessor blocks to do one thing -// or another. This is bad because it means that if clang was built in release -// mode, it assumes that you are building in release mode which is not always -// the case. You can end up with functions that are defined as empty in header -// files when NDEBUG is not defined, and this can cause link errors with the -// clang .a files that you have since you might be missing functions in the .a -// file. So we have to define NDEBUG when including clang headers to avoid any -// mismatches. This is covered by rdar://problem/8691220 - -#if !defined(NDEBUG) && !defined(LLVM_NDEBUG_OFF) -#define LLDB_DEFINED_NDEBUG_FOR_CLANG -#define NDEBUG -// Need to include assert.h so it is as clang would expect it to be (disabled) -#include -#endif - #include "clang/AST/ASTContext.h" #include "clang/AST/ASTImporter.h" #include "clang/AST/Attr.h" @@ -54,13 +36,6 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Sema/Sema.h" -#ifdef LLDB_DEFINED_NDEBUG_FOR_CLANG -#undef NDEBUG -#undef LLDB_DEFINED_NDEBUG_FOR_CLANG -// Need to re-include assert.h so it is as _we_ would expect it to be (enabled) -#include -#endif - #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" @@ -337,6 +312,8 @@ static ClangASTMap &GetASTMap() { return *g_map_ptr; } +char ClangASTContext::ID; + bool ClangASTContext::IsOperator(llvm::StringRef name, clang::OverloadedOperatorKind &op_kind) { // All operators have to start with "operator". @@ -522,8 +499,7 @@ static void ParseLangArgs(LangOptions &Opts, InputKind IK, const char *triple) { Opts.NoInlineDefine = !Opt; } -ClangASTContext::ClangASTContext(llvm::StringRef target_triple) - : TypeSystem(TypeSystem::eKindClang) { +ClangASTContext::ClangASTContext(llvm::StringRef target_triple) { if (!target_triple.empty()) SetTargetTriple(target_triple); // The caller didn't pass an ASTContext so create a new one for this @@ -531,16 +507,14 @@ ClangASTContext::ClangASTContext(llvm::StringRef target_triple) CreateASTContext(); } -ClangASTContext::ClangASTContext(ArchSpec arch) - : TypeSystem(TypeSystem::eKindClang) { +ClangASTContext::ClangASTContext(ArchSpec arch) { SetTargetTriple(arch.GetTriple().str()); // The caller didn't pass an ASTContext so create a new one for this // ClangASTContext. CreateASTContext(); } -ClangASTContext::ClangASTContext(ASTContext &existing_ctxt) - : TypeSystem(TypeSystem::eKindClang) { +ClangASTContext::ClangASTContext(ASTContext &existing_ctxt) { SetTargetTriple(existing_ctxt.getTargetInfo().getTriple().str()); m_ast_up.reset(&existing_ctxt); @@ -563,47 +537,47 @@ uint32_t ClangASTContext::GetPluginVersion() { return 1; } lldb::TypeSystemSP ClangASTContext::CreateInstance(lldb::LanguageType language, lldb_private::Module *module, Target *target) { - if (ClangASTContextSupportsLanguage(language)) { - ArchSpec arch; - if (module) - arch = module->GetArchitecture(); - else if (target) - arch = target->GetArchitecture(); - - if (arch.IsValid()) { - ArchSpec fixed_arch = arch; - // LLVM wants this to be set to iOS or MacOSX; if we're working on - // a bare-boards type image, change the triple for llvm's benefit. - if (fixed_arch.GetTriple().getVendor() == llvm::Triple::Apple && - fixed_arch.GetTriple().getOS() == llvm::Triple::UnknownOS) { - if (fixed_arch.GetTriple().getArch() == llvm::Triple::arm || - fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64 || - fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64_32 || - fixed_arch.GetTriple().getArch() == llvm::Triple::thumb) { - fixed_arch.GetTriple().setOS(llvm::Triple::IOS); - } else { - fixed_arch.GetTriple().setOS(llvm::Triple::MacOSX); - } - } - - if (module) { - std::shared_ptr ast_sp( - new ClangASTContext(fixed_arch)); - return ast_sp; - } else if (target && target->IsValid()) { - std::shared_ptr ast_sp( - new ClangASTContextForExpressions(*target, fixed_arch)); - ast_sp->m_scratch_ast_source_up.reset( - new ClangASTSource(target->shared_from_this())); - lldbassert(ast_sp->getFileManager()); - ast_sp->m_scratch_ast_source_up->InstallASTContext( - *ast_sp->getASTContext(), *ast_sp->getFileManager(), true); - llvm::IntrusiveRefCntPtr proxy_ast_source( - ast_sp->m_scratch_ast_source_up->CreateProxy()); - ast_sp->SetExternalSource(proxy_ast_source); - return ast_sp; - } - } + if (!ClangASTContextSupportsLanguage(language)) + return lldb::TypeSystemSP(); + ArchSpec arch; + if (module) + arch = module->GetArchitecture(); + else if (target) + arch = target->GetArchitecture(); + + if (!arch.IsValid()) + return lldb::TypeSystemSP(); + + ArchSpec fixed_arch = arch; + // LLVM wants this to be set to iOS or MacOSX; if we're working on + // a bare-boards type image, change the triple for llvm's benefit. + if (fixed_arch.GetTriple().getVendor() == llvm::Triple::Apple && + fixed_arch.GetTriple().getOS() == llvm::Triple::UnknownOS) { + if (fixed_arch.GetTriple().getArch() == llvm::Triple::arm || + fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64 || + fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64_32 || + fixed_arch.GetTriple().getArch() == llvm::Triple::thumb) { + fixed_arch.GetTriple().setOS(llvm::Triple::IOS); + } else { + fixed_arch.GetTriple().setOS(llvm::Triple::MacOSX); + } + } + + if (module) { + std::shared_ptr ast_sp(new ClangASTContext(fixed_arch)); + return ast_sp; + } else if (target && target->IsValid()) { + std::shared_ptr ast_sp( + new ClangASTContextForExpressions(*target, fixed_arch)); + ast_sp->m_scratch_ast_source_up.reset( + new ClangASTSource(target->shared_from_this())); + lldbassert(ast_sp->getFileManager()); + ast_sp->m_scratch_ast_source_up->InstallASTContext( + *ast_sp, *ast_sp->getFileManager(), true); + llvm::IntrusiveRefCntPtr proxy_ast_source( + ast_sp->m_scratch_ast_source_up->CreateProxy()); + ast_sp->SetExternalSource(proxy_ast_source); + return ast_sp; } return lldb::TypeSystemSP(); } @@ -844,77 +818,62 @@ static inline bool QualTypeMatchesBitSize(const uint64_t bit_size, CompilerType ClangASTContext::GetBuiltinTypeForEncodingAndBitSize(Encoding encoding, size_t bit_size) { - return ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - getASTContext(), encoding, bit_size); -} - -CompilerType ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - ASTContext *ast, Encoding encoding, uint32_t bit_size) { - auto *clang_ast_context = ClangASTContext::GetASTContext(ast); + ASTContext *ast = this->getASTContext(); if (!ast) return CompilerType(); switch (encoding) { case eEncodingInvalid: if (QualTypeMatchesBitSize(bit_size, ast, ast->VoidPtrTy)) - return CompilerType(clang_ast_context, ast->VoidPtrTy.getAsOpaquePtr()); + return CompilerType(this, ast->VoidPtrTy.getAsOpaquePtr()); break; case eEncodingUint: if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedCharTy)) - return CompilerType(clang_ast_context, - ast->UnsignedCharTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedCharTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedShortTy)) - return CompilerType(clang_ast_context, - ast->UnsignedShortTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedShortTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedIntTy)) - return CompilerType(clang_ast_context, - ast->UnsignedIntTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedIntTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedLongTy)) - return CompilerType(clang_ast_context, - ast->UnsignedLongTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedLongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedLongLongTy)) - return CompilerType(clang_ast_context, - ast->UnsignedLongLongTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedLongLongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedInt128Ty)) - return CompilerType(clang_ast_context, - ast->UnsignedInt128Ty.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedInt128Ty.getAsOpaquePtr()); break; case eEncodingSint: if (QualTypeMatchesBitSize(bit_size, ast, ast->SignedCharTy)) - return CompilerType(clang_ast_context, - ast->SignedCharTy.getAsOpaquePtr()); + return CompilerType(this, ast->SignedCharTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->ShortTy)) - return CompilerType(clang_ast_context, ast->ShortTy.getAsOpaquePtr()); + return CompilerType(this, ast->ShortTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->IntTy)) - return CompilerType(clang_ast_context, ast->IntTy.getAsOpaquePtr()); + return CompilerType(this, ast->IntTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->LongTy)) - return CompilerType(clang_ast_context, ast->LongTy.getAsOpaquePtr()); + return CompilerType(this, ast->LongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->LongLongTy)) - return CompilerType(clang_ast_context, ast->LongLongTy.getAsOpaquePtr()); + return CompilerType(this, ast->LongLongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->Int128Ty)) - return CompilerType(clang_ast_context, ast->Int128Ty.getAsOpaquePtr()); + return CompilerType(this, ast->Int128Ty.getAsOpaquePtr()); break; case eEncodingIEEE754: if (QualTypeMatchesBitSize(bit_size, ast, ast->FloatTy)) - return CompilerType(clang_ast_context, ast->FloatTy.getAsOpaquePtr()); + return CompilerType(this, ast->FloatTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->DoubleTy)) - return CompilerType(clang_ast_context, ast->DoubleTy.getAsOpaquePtr()); + return CompilerType(this, ast->DoubleTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->LongDoubleTy)) - return CompilerType(clang_ast_context, - ast->LongDoubleTy.getAsOpaquePtr()); + return CompilerType(this, ast->LongDoubleTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->HalfTy)) - return CompilerType(clang_ast_context, ast->HalfTy.getAsOpaquePtr()); + return CompilerType(this, ast->HalfTy.getAsOpaquePtr()); break; case eEncodingVector: // Sanity check that bit_size is a multiple of 8's. if (bit_size && !(bit_size & 0x7u)) return CompilerType( - clang_ast_context, - ast->getExtVectorType(ast->UnsignedCharTy, bit_size / 8) - .getAsOpaquePtr()); + this, ast->getExtVectorType(ast->UnsignedCharTy, bit_size / 8) + .getAsOpaquePtr()); break; } @@ -987,11 +946,6 @@ ClangASTContext::GetBasicTypeEnumeration(ConstString name) { return eBasicTypeInvalid; } -CompilerType ClangASTContext::GetBasicType(ConstString name) { - lldb::BasicType basic_type = ClangASTContext::GetBasicTypeEnumeration(name); - return GetBasicType(basic_type); -} - uint32_t ClangASTContext::GetPointerByteSize() { if (m_pointer_byte_size == 0) if (auto size = GetBasicType(lldb::eBasicTypeVoid) @@ -10200,16 +10154,20 @@ bool ClangASTContext::DeclContextIsContainedInLookup( return false; } +static bool IsClangDeclContext(const CompilerDeclContext &dc) { + return dc.IsValid() && isa(dc.GetTypeSystem()); +} + clang::DeclContext * ClangASTContext::DeclContextGetAsDeclContext(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return (clang::DeclContext *)dc.GetOpaqueDeclContext(); return nullptr; } ObjCMethodDecl * ClangASTContext::DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; @@ -10217,7 +10175,7 @@ ClangASTContext::DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc) { CXXMethodDecl * ClangASTContext::DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; @@ -10225,7 +10183,7 @@ ClangASTContext::DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc) { clang::FunctionDecl * ClangASTContext::DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; @@ -10233,7 +10191,7 @@ ClangASTContext::DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc) { clang::NamespaceDecl * ClangASTContext::DeclContextGetAsNamespaceDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index b37636c3bafc1..b05036e27fcf2 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -21,30 +21,21 @@ CompileUnit::CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, const char *pathname, const lldb::user_id_t cu_sym_id, lldb::LanguageType language, lldb_private::LazyBool is_optimized) - : ModuleChild(module_sp), FileSpec(pathname), UserID(cu_sym_id), - m_user_data(user_data), m_language(language), m_flags(0), - m_support_files(), m_line_table_up(), m_variables(), - m_is_optimized(is_optimized) { - if (language != eLanguageTypeUnknown) - m_flags.Set(flagsParsedLanguage); - assert(module_sp); -} + : CompileUnit(module_sp, user_data, FileSpec(pathname), cu_sym_id, language, + is_optimized) {} CompileUnit::CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, const FileSpec &fspec, const lldb::user_id_t cu_sym_id, lldb::LanguageType language, lldb_private::LazyBool is_optimized) - : ModuleChild(module_sp), FileSpec(fspec), UserID(cu_sym_id), - m_user_data(user_data), m_language(language), m_flags(0), - m_support_files(), m_line_table_up(), m_variables(), + : ModuleChild(module_sp), UserID(cu_sym_id), m_user_data(user_data), + m_language(language), m_flags(0), m_file_spec(fspec), m_is_optimized(is_optimized) { if (language != eLanguageTypeUnknown) m_flags.Set(flagsParsedLanguage); assert(module_sp); } -CompileUnit::~CompileUnit() {} - void CompileUnit::CalculateSymbolContext(SymbolContext *sc) { sc->comp_unit = this; GetModule()->CalculateSymbolContext(sc); @@ -63,7 +54,7 @@ void CompileUnit::GetDescription(Stream *s, lldb::DescriptionLevel level) const { const char *language = Language::GetNameForLanguageType(m_language); *s << "id = " << (const UserID &)*this << ", file = \"" - << (const FileSpec &)*this << "\", language = \"" << language << '"'; + << this->GetPrimaryFile() << "\", language = \"" << language << '"'; } void CompileUnit::ForeachFunction( @@ -117,8 +108,7 @@ void CompileUnit::Dump(Stream *s, bool show_context) const { s->Printf("%p: ", static_cast(this)); s->Indent(); *s << "CompileUnit" << static_cast(*this) << ", language = \"" - << language << "\", file = '" << static_cast(*this) - << "'\n"; + << language << "\", file = '" << GetPrimaryFile() << "'\n"; // m_types.Dump(s); @@ -217,53 +207,50 @@ VariableListSP CompileUnit::GetVariableList(bool can_create) { return m_variables; } +std::vector FindFileIndexes(const FileSpecList &files, const FileSpec &file) { + std::vector result; + uint32_t idx = -1; + while ((idx = files.FindFileIndex(idx + 1, file, /*full=*/true)) != + UINT32_MAX) + result.push_back(idx); + return result; +} + uint32_t CompileUnit::FindLineEntry(uint32_t start_idx, uint32_t line, const FileSpec *file_spec_ptr, bool exact, LineEntry *line_entry_ptr) { - uint32_t file_idx = 0; + if (!file_spec_ptr) + file_spec_ptr = &GetPrimaryFile(); + std::vector file_indexes = FindFileIndexes(GetSupportFiles(), *file_spec_ptr); + if (file_indexes.empty()) + return UINT32_MAX; - if (file_spec_ptr) { - file_idx = GetSupportFiles().FindFileIndex(1, *file_spec_ptr, true); - if (file_idx == UINT32_MAX) - return UINT32_MAX; - } else { - // All the line table entries actually point to the version of the Compile - // Unit that is in the support files (the one at 0 was artificially added.) - // So prefer the one further on in the support files if it exists... - const FileSpecList &support_files = GetSupportFiles(); - const bool full = true; - file_idx = support_files.FindFileIndex( - 1, support_files.GetFileSpecAtIndex(0), full); - if (file_idx == UINT32_MAX) - file_idx = 0; - } LineTable *line_table = GetLineTable(); if (line_table) - return line_table->FindLineEntryIndexByFileIndex(start_idx, file_idx, line, - exact, line_entry_ptr); + return line_table->FindLineEntryIndexByFileIndex( + start_idx, file_indexes, line, exact, line_entry_ptr); return UINT32_MAX; } -uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, - uint32_t line, bool check_inlines, - bool exact, - SymbolContextItem resolve_scope, - SymbolContextList &sc_list) { +void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, + uint32_t line, bool check_inlines, + bool exact, + SymbolContextItem resolve_scope, + SymbolContextList &sc_list) { // First find all of the file indexes that match our "file_spec". If // "file_spec" has an empty directory, then only compare the basenames when // finding file indexes std::vector file_indexes; - const bool full_match = (bool)file_spec.GetDirectory(); bool file_spec_matches_cu_file_spec = - FileSpec::Equal(file_spec, *this, full_match); + FileSpec::Match(file_spec, this->GetPrimaryFile()); // If we are not looking for inlined functions and our file spec doesn't // match then we are done... if (!file_spec_matches_cu_file_spec && !check_inlines) - return 0; + return; uint32_t file_idx = - GetSupportFiles().FindFileIndex(1, file_spec, true); + GetSupportFiles().FindFileIndex(0, file_spec, true); while (file_idx != UINT32_MAX) { file_indexes.push_back(file_idx); file_idx = GetSupportFiles().FindFileIndex(file_idx + 1, file_spec, true); @@ -271,84 +258,67 @@ uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, const size_t num_file_indexes = file_indexes.size(); if (num_file_indexes == 0) - return 0; - - const uint32_t prev_size = sc_list.GetSize(); + return; SymbolContext sc(GetModule()); sc.comp_unit = this; - if (line != 0) { - LineTable *line_table = sc.comp_unit->GetLineTable(); - - if (line_table != nullptr) { - uint32_t found_line; - uint32_t line_idx; - - if (num_file_indexes == 1) { - // We only have a single support file that matches, so use the line - // table function that searches for a line entries that match a single - // support file index - LineEntry line_entry; - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes.front(), line, exact, &line_entry); - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - // If they only asked for the line entry, then we're done, we can - // just copy that over. But if they wanted more than just the line - // number, fill it in. - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext( - &sc, resolve_scope); - } - - sc_list.Append(sc); - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes.front(), found_line, true, - &line_entry); - } - } else { - // We found multiple support files that match "file_spec" so use the - // line table function that searches for a line entries that match a - // multiple support file indexes. - LineEntry line_entry; - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes, line, exact, &line_entry); - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext( - &sc, resolve_scope); - } - - sc_list.Append(sc); - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes, found_line, true, &line_entry); - } - } + if (line == 0) { + if (file_spec_matches_cu_file_spec && !check_inlines) { + // only append the context if we aren't looking for inline call sites by + // file and line and if the file spec matches that of the compile unit + sc_list.Append(sc); } - } else if (file_spec_matches_cu_file_spec && !check_inlines) { - // only append the context if we aren't looking for inline call sites by - // file and line and if the file spec matches that of the compile unit + return; + } + + LineTable *line_table = sc.comp_unit->GetLineTable(); + + if (line_table == nullptr) + return; + + uint32_t line_idx; + LineEntry line_entry; + + if (num_file_indexes == 1) { + // We only have a single support file that matches, so use the line + // table function that searches for a line entries that match a single + // support file index + line_idx = line_table->FindLineEntryIndexByFileIndex( + 0, file_indexes.front(), line, exact, &line_entry); + } else { + // We found multiple support files that match "file_spec" so use the + // line table function that searches for a line entries that match a + // multiple support file indexes. + line_idx = line_table->FindLineEntryIndexByFileIndex(0, file_indexes, line, + exact, &line_entry); + } + + // If "exact == true", then "found_line" will be the same as "line". If + // "exact == false", the "found_line" will be the closest line entry + // with a line number greater than "line" and we will use this for our + // subsequent line exact matches below. + uint32_t found_line = line_entry.line; + + while (line_idx != UINT32_MAX) { + // If they only asked for the line entry, then we're done, we can + // just copy that over. But if they wanted more than just the line + // number, fill it in. + if (resolve_scope == eSymbolContextLineEntry) { + sc.line_entry = line_entry; + } else { + line_entry.range.GetBaseAddress().CalculateSymbolContext(&sc, + resolve_scope); + } + sc_list.Append(sc); + if (num_file_indexes == 1) + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes.front(), found_line, true, &line_entry); + else + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes, found_line, true, &line_entry); } - return sc_list.GetSize() - prev_size; } bool CompileUnit::GetIsOptimized() { diff --git a/lldb/source/Symbol/CompilerDecl.cpp b/lldb/source/Symbol/CompilerDecl.cpp index 2c64113a2bbeb..48d9169c1a7a2 100644 --- a/lldb/source/Symbol/CompilerDecl.cpp +++ b/lldb/source/Symbol/CompilerDecl.cpp @@ -12,10 +12,6 @@ using namespace lldb_private; -bool CompilerDecl::IsClang() const { - return IsValid() && m_type_system->getKind() == TypeSystem::eKindClang; -} - ConstString CompilerDecl::GetName() const { return m_type_system->DeclGetName(m_opaque_decl); } diff --git a/lldb/source/Symbol/CompilerDeclContext.cpp b/lldb/source/Symbol/CompilerDeclContext.cpp index a6f046c4eb22e..672de6ec34d1e 100644 --- a/lldb/source/Symbol/CompilerDeclContext.cpp +++ b/lldb/source/Symbol/CompilerDeclContext.cpp @@ -23,10 +23,6 @@ CompilerDeclContext::FindDeclByName(ConstString name, return std::vector(); } -bool CompilerDeclContext::IsClang() const { - return IsValid() && m_type_system->getKind() == TypeSystem::eKindClang; -} - ConstString CompilerDeclContext::GetName() const { if (IsValid()) return m_type_system->DeclContextGetName(m_opaque_decl_ctx); diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index 571a8570a43b3..d35213120b4dc 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -874,173 +874,6 @@ bool CompilerType::GetValueAsScalar(const lldb_private::DataExtractor &data, return false; } -bool CompilerType::SetValueFromScalar(const Scalar &value, Stream &strm) { - if (!IsValid()) - return false; - - // Aggregate types don't have scalar values - if (!IsAggregateType()) { - strm.GetFlags().Set(Stream::eBinary); - uint64_t count = 0; - lldb::Encoding encoding = GetEncoding(count); - - if (encoding == lldb::eEncodingInvalid || count != 1) - return false; - - llvm::Optional bit_width = GetBitSize(nullptr); - if (!bit_width) - return false; - - // This function doesn't currently handle non-byte aligned assignments - if ((*bit_width % 8) != 0) - return false; - - const uint64_t byte_size = (*bit_width + 7) / 8; - switch (encoding) { - case lldb::eEncodingInvalid: - break; - case lldb::eEncodingVector: - break; - case lldb::eEncodingUint: - switch (byte_size) { - case 1: - strm.PutHex8(value.UInt()); - return true; - case 2: - strm.PutHex16(value.UInt()); - return true; - case 4: - strm.PutHex32(value.UInt()); - return true; - case 8: - strm.PutHex64(value.ULongLong()); - return true; - default: - break; - } - break; - - case lldb::eEncodingSint: - switch (byte_size) { - case 1: - strm.PutHex8(value.SInt()); - return true; - case 2: - strm.PutHex16(value.SInt()); - return true; - case 4: - strm.PutHex32(value.SInt()); - return true; - case 8: - strm.PutHex64(value.SLongLong()); - return true; - default: - break; - } - break; - - case lldb::eEncodingIEEE754: - if (byte_size <= sizeof(long double)) { - if (byte_size == sizeof(float)) { - strm.PutFloat(value.Float()); - return true; - } else if (byte_size == sizeof(double)) { - strm.PutDouble(value.Double()); - return true; - } else if (byte_size == sizeof(long double)) { - strm.PutDouble(value.LongDouble()); - return true; - } - } - break; - } - } - return false; -} - -bool CompilerType::ReadFromMemory(lldb_private::ExecutionContext *exe_ctx, - lldb::addr_t addr, AddressType address_type, - lldb_private::DataExtractor &data) { - if (!IsValid()) - return false; - - // Can't convert a file address to anything valid without more context (which - // Module it came from) - if (address_type == eAddressTypeFile) - return false; - - if (!GetCompleteType()) - return false; - - auto byte_size = - GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr); - if (!byte_size) - return false; - - if (data.GetByteSize() < *byte_size) { - lldb::DataBufferSP data_sp(new DataBufferHeap(*byte_size, '\0')); - data.SetData(data_sp); - } - - uint8_t *dst = const_cast(data.PeekData(0, *byte_size)); - if (dst != nullptr) { - if (address_type == eAddressTypeHost) { - if (addr == 0) - return false; - // The address is an address in this process, so just copy it - memcpy(dst, reinterpret_cast(addr), *byte_size); - return true; - } else { - Process *process = nullptr; - if (exe_ctx) - process = exe_ctx->GetProcessPtr(); - if (process) { - Status error; - return process->ReadMemory(addr, dst, *byte_size, error) == *byte_size; - } - } - } - return false; -} - -bool CompilerType::WriteToMemory(lldb_private::ExecutionContext *exe_ctx, - lldb::addr_t addr, AddressType address_type, - StreamString &new_value) { - if (!IsValid()) - return false; - - // Can't convert a file address to anything valid without more context (which - // Module it came from) - if (address_type == eAddressTypeFile) - return false; - - if (!GetCompleteType()) - return false; - - auto byte_size = - GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr); - if (!byte_size) - return false; - - if (*byte_size > 0) { - if (address_type == eAddressTypeHost) { - // The address is an address in this process, so just copy it - memcpy((void *)addr, new_value.GetData(), *byte_size); - return true; - } else { - Process *process = nullptr; - if (exe_ctx) - process = exe_ctx->GetProcessPtr(); - if (process) { - Status error; - return process->WriteMemory(addr, new_value.GetData(), *byte_size, - error) == *byte_size; - } - } - } - return false; -} - bool lldb_private::operator==(const lldb_private::CompilerType &lhs, const lldb_private::CompilerType &rhs) { return lhs.GetTypeSystem() == rhs.GetTypeSystem() && diff --git a/lldb/source/Symbol/Declaration.cpp b/lldb/source/Symbol/Declaration.cpp index d78ba967d280b..4d0975d34256c 100644 --- a/lldb/source/Symbol/Declaration.cpp +++ b/lldb/source/Symbol/Declaration.cpp @@ -90,12 +90,9 @@ bool Declaration::FileAndLineEqual(const Declaration &declaration) const { bool lldb_private::operator==(const Declaration &lhs, const Declaration &rhs) { #ifdef LLDB_ENABLE_DECLARATION_COLUMNS - if (lhs.GetColumn() == rhs.GetColumn()) - if (lhs.GetLine() == rhs.GetLine()) - return lhs.GetFile() == rhs.GetFile(); + if (lhs.GetColumn() != rhs.GetColumn()) + return false; #else - if (lhs.GetLine() == rhs.GetLine()) - return FileSpec::Equal(lhs.GetFile(), rhs.GetFile(), true); + return lhs.GetLine() == rhs.GetLine() && lhs.GetFile() == rhs.GetFile(); #endif - return false; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 9e81b6140eb76..c392317df0066 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -340,7 +340,8 @@ Block &Function::GetBlock(bool can_create) { "error: unable to find module " "shared pointer for function '%s' " "in %s\n", - GetName().GetCString(), m_comp_unit->GetPath().c_str()); + GetName().GetCString(), + m_comp_unit->GetPrimaryFile().GetPath().c_str()); } m_block.SetBlockInfoHasBeenParsed(true, true); } diff --git a/lldb/source/Symbol/LineTable.cpp b/lldb/source/Symbol/LineTable.cpp index 1433dc156d915..fecc90c409f22 100644 --- a/lldb/source/Symbol/LineTable.cpp +++ b/lldb/source/Symbol/LineTable.cpp @@ -34,11 +34,9 @@ void LineTable::InsertLineEntry(lldb::addr_t file_addr, uint32_t line, is_start_of_basic_block, is_prologue_end, is_epilogue_begin, is_terminal_entry); - entry_collection::iterator begin_pos = m_entries.begin(); - entry_collection::iterator end_pos = m_entries.end(); LineTable::Entry::LessThanBinaryPredicate less_than_bp(this); entry_collection::iterator pos = - upper_bound(begin_pos, end_pos, entry, less_than_bp); + llvm::upper_bound(m_entries, entry, less_than_bp); // Stream s(stdout); // s << "\n\nBefore:\n"; @@ -289,8 +287,6 @@ uint32_t LineTable::FindLineEntryIndexByFileIndex( uint32_t line, bool exact, LineEntry *line_entry_ptr) { const size_t count = m_entries.size(); - std::vector::const_iterator begin_pos = file_indexes.begin(); - std::vector::const_iterator end_pos = file_indexes.end(); size_t best_match = UINT32_MAX; for (size_t idx = start_idx; idx < count; ++idx) { @@ -299,7 +295,7 @@ uint32_t LineTable::FindLineEntryIndexByFileIndex( if (m_entries[idx].is_terminal_entry) continue; - if (find(begin_pos, end_pos, m_entries[idx].file_idx) == end_pos) + if (llvm::find(file_indexes, m_entries[idx].file_idx) == file_indexes.end()) continue; // Exact match always wins. Otherwise try to find the closest line > the diff --git a/lldb/source/Symbol/LocateSymbolFile.cpp b/lldb/source/Symbol/LocateSymbolFile.cpp index 0d0e5300668fc..d2b39d6acd704 100644 --- a/lldb/source/Symbol/LocateSymbolFile.cpp +++ b/lldb/source/Symbol/LocateSymbolFile.cpp @@ -230,19 +230,19 @@ static FileSpec LocateExecutableSymbolFileDsym(const ModuleSpec &module_spec) { ModuleSpec Symbols::LocateExecutableObjectFile(const ModuleSpec &module_spec) { ModuleSpec result; - const FileSpec *exec_fspec = module_spec.GetFileSpecPtr(); + const FileSpec &exec_fspec = module_spec.GetFileSpec(); const ArchSpec *arch = module_spec.GetArchitecturePtr(); const UUID *uuid = module_spec.GetUUIDPtr(); static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); Timer scoped_timer( func_cat, "LocateExecutableObjectFile (file = %s, arch = %s, uuid = %p)", - exec_fspec ? exec_fspec->GetFilename().AsCString("") : "", + exec_fspec ? exec_fspec.GetFilename().AsCString("") : "", arch ? arch->GetArchitectureName() : "", (const void *)uuid); ModuleSpecList module_specs; ModuleSpec matched_module_spec; if (exec_fspec && - ObjectFile::GetModuleSpecifications(*exec_fspec, 0, 0, module_specs) && + ObjectFile::GetModuleSpecifications(exec_fspec, 0, 0, module_specs) && module_specs.FindMatchingModuleSpec(module_spec, matched_module_spec)) { result.GetFileSpec() = exec_fspec; } else { diff --git a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp index 74718a8c5e307..5ee632ec20773 100644 --- a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp +++ b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp @@ -595,7 +595,7 @@ bool Symbols::DownloadObjectAndSymbolFile(ModuleSpec &module_spec, } Status error = Host::RunShellCommand( command.GetData(), - NULL, // current working directory + FileSpec(), // current working directory &exit_status, // Exit status &signo, // Signal int * &command_output, // Command output diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 38bc7722d0d02..812c6de4da52b 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -360,6 +360,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) { case eSectionTypeDWARFDebugPubTypes: case eSectionTypeDWARFDebugRanges: case eSectionTypeDWARFDebugRngLists: + case eSectionTypeDWARFDebugRngListsDwo: case eSectionTypeDWARFDebugStr: case eSectionTypeDWARFDebugStrDwo: case eSectionTypeDWARFDebugStrOffsets: @@ -476,7 +477,13 @@ size_t ObjectFile::GetData(lldb::offset_t offset, size_t length, DataExtractor &data) const { // The entire file has already been mmap'ed into m_data, so just copy from // there as the back mmap buffer will be shared with shared pointers. - return data.SetData(m_data, offset, length); + size_t ret = data.SetData(m_data, offset, length); + // DataExtractor::SetData copies the address byte size from m_data, but + // m_data's address byte size is only set from sizeof(void*), and we can't + // access subclasses GetAddressByteSize() when setting up m_data in the + // constructor. + data.SetAddressByteSize(GetAddressByteSize()); + return ret; } size_t ObjectFile::CopyData(lldb::offset_t offset, size_t length, diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 7828ca613359d..b77c011f8cb8b 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -315,14 +315,14 @@ void SymbolContext::Dump(Stream *s, Target *target) const { s->Indent(); *s << "CompileUnit = " << comp_unit; if (comp_unit != nullptr) - *s << " {0x" << comp_unit->GetID() << "} " - << *(static_cast(comp_unit)); + s->Format(" {{{0:x-16}} {1}", comp_unit->GetID(), + comp_unit->GetPrimaryFile()); s->EOL(); s->Indent(); *s << "Function = " << function; if (function != nullptr) { - *s << " {0x" << function->GetID() << "} " << function->GetType()->GetName() - << ", address-range = "; + s->Format(" {{{0:x-16}} {1}, address-range = ", function->GetID(), + function->GetType()->GetName()); function->GetAddressRange().Dump(s, target, Address::DumpStyleLoadAddress, Address::DumpStyleModuleWithFileAddress); s->EOL(); @@ -337,10 +337,7 @@ void SymbolContext::Dump(Stream *s, Target *target) const { s->Indent(); *s << "Block = " << block; if (block != nullptr) - *s << " {0x" << block->GetID() << '}'; - // Dump the block and pass it a negative depth to we print all the parent - // blocks if (block != NULL) - // block->Dump(s, function->GetFileAddress(), INT_MIN); + s->Format(" {{{0:x-16}}", block->GetID()); s->EOL(); s->Indent(); *s << "LineEntry = "; @@ -354,7 +351,8 @@ void SymbolContext::Dump(Stream *s, Target *target) const { s->EOL(); *s << "Variable = " << variable; if (variable != nullptr) { - *s << " {0x" << variable->GetID() << "} " << variable->GetType()->GetName(); + s->Format(" {{{0:x-16}} {1}", variable->GetID(), + variable->GetType()->GetName()); s->EOL(); } s->IndentLess(); @@ -1028,8 +1026,7 @@ bool SymbolContextSpecifier::SymbolContextMatches(SymbolContext &sc) { return false; } else { FileSpec module_file_spec(m_module_spec); - if (!FileSpec::Equal(module_file_spec, sc.module_sp->GetFileSpec(), - false)) + if (!FileSpec::Match(module_file_spec, sc.module_sp->GetFileSpec())) return false; } } @@ -1048,8 +1045,8 @@ bool SymbolContextSpecifier::SymbolContextMatches(SymbolContext &sc) { sc.block->GetInlinedFunctionInfo(); if (inline_info != nullptr) { was_inlined = true; - if (!FileSpec::Equal(inline_info->GetDeclaration().GetFile(), - *(m_file_spec_up.get()), false)) + if (!FileSpec::Match(*m_file_spec_up, + inline_info->GetDeclaration().GetFile())) return false; } } @@ -1057,7 +1054,7 @@ bool SymbolContextSpecifier::SymbolContextMatches(SymbolContext &sc) { // Next check the comp unit, but only if the SymbolContext was not // inlined. if (!was_inlined && sc.comp_unit != nullptr) { - if (!FileSpec::Equal(*(sc.comp_unit), *(m_file_spec_up.get()), false)) + if (!FileSpec::Match(*m_file_spec_up, sc.comp_unit->GetPrimaryFile())) return false; } } diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 9a2b5cddd73b7..c7a6bf2145267 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -13,7 +13,6 @@ #include "lldb/Core/Module.h" #include "lldb/Core/RichManglingContext.h" -#include "lldb/Core/STLUtils.h" #include "lldb/Core/Section.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/Symbol.h" @@ -107,10 +106,8 @@ void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order, // sorted by name. So we must make the ordered symbol list up ourselves. s->PutCString(" (sorted by name):\n"); DumpSymbolHeader(s); - typedef std::multimap - CStringToSymbol; - CStringToSymbol name_map; + + std::multimap name_map; for (const_iterator pos = m_symbols.begin(), end = m_symbols.end(); pos != end; ++pos) { const char *name = pos->GetName().AsCString(); @@ -118,12 +115,10 @@ void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order, name_map.insert(std::make_pair(name, &(*pos))); } - for (CStringToSymbol::const_iterator pos = name_map.begin(), - end = name_map.end(); - pos != end; ++pos) { + for (const auto &name_to_symbol : name_map) { + const Symbol *symbol = name_to_symbol.second; s->Indent(); - pos->second->Dump(s, target, pos->second - &m_symbols[0], - name_preference); + symbol->Dump(s, target, symbol - &m_symbols[0], name_preference); } } break; diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index c3e5c03709517..6465ce3dd156f 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -255,7 +255,7 @@ void Type::Dump(Stream *s, bool show_context) { *s << ", compiler_type = " << m_compiler_type.GetOpaqueQualType() << ' '; GetForwardCompilerType().DumpTypeDescription(s); } else if (m_encoding_uid != LLDB_INVALID_UID) { - *s << ", type_data = " << (uint64_t)m_encoding_uid; + s->Format(", type_data = {0:x-16}", m_encoding_uid); switch (m_encoding_uid_type) { case eEncodingInvalid: break; diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index c63f24aea3354..4e746bd18e1f3 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -43,8 +43,6 @@ size_t LanguageSet::Size() const { return bitvector.count(); } bool LanguageSet::Empty() const { return bitvector.none(); } bool LanguageSet::operator[](unsigned i) const { return bitvector[i]; } -TypeSystem::TypeSystem(LLVMCastKind kind) : m_kind(kind), m_sym_file(nullptr) {} - TypeSystem::~TypeSystem() {} static lldb::TypeSystemSP CreateInstanceHelper(lldb::LanguageType language, diff --git a/lldb/source/Symbol/Variable.cpp b/lldb/source/Symbol/Variable.cpp index 427dbf459c4eb..fc7d127a326fa 100644 --- a/lldb/source/Symbol/Variable.cpp +++ b/lldb/source/Symbol/Variable.cpp @@ -112,7 +112,7 @@ void Variable::Dump(Stream *s, bool show_context) const { if (m_symfile_type_sp) { Type *type = m_symfile_type_sp->GetType(); if (type) { - *s << ", type = {" << type->GetID() << "} " << (void *)type << " ("; + s->Format(", type = {{{0:x-16}} {1} (", type->GetID(), type); type->DumpTypeName(s); s->PutChar(')'); } @@ -134,7 +134,7 @@ void Variable::Dump(Stream *s, bool show_context) const { s->PutCString("thread local"); break; default: - *s << "??? (" << m_scope << ')'; + s->AsRawOstream() << "??? (" << m_scope << ')'; } } @@ -487,13 +487,6 @@ static void PrivateAutoComplete( &prefix_path, // Anything that has been resolved already will be in here const CompilerType &compiler_type, CompletionRequest &request); -static void PrivateAutoCompleteMembers( - StackFrame *frame, const std::string &partial_member_name, - llvm::StringRef partial_path, - const llvm::Twine - &prefix_path, // Anything that has been resolved already will be in here - const CompilerType &compiler_type, CompletionRequest &request); - static void PrivateAutoCompleteMembers( StackFrame *frame, const std::string &partial_member_name, llvm::StringRef partial_path, diff --git a/lldb/source/Target/ABI.cpp b/lldb/source/Target/ABI.cpp index 005261e0ddee0..6217ee2ed9ced 100644 --- a/lldb/source/Target/ABI.cpp +++ b/lldb/source/Target/ABI.cpp @@ -63,24 +63,6 @@ bool ABI::GetRegisterInfoByName(ConstString name, RegisterInfo &info) { return false; } -bool ABI::GetRegisterInfoByKind(RegisterKind reg_kind, uint32_t reg_num, - RegisterInfo &info) { - if (reg_kind < eRegisterKindEHFrame || reg_kind >= kNumRegisterKinds) - return false; - - uint32_t count = 0; - const RegisterInfo *register_info_array = GetRegisterInfoArray(count); - if (register_info_array) { - for (uint32_t i = 0; i < count; ++i) { - if (register_info_array[i].kinds[reg_kind] == reg_num) { - info = register_info_array[i]; - return true; - } - } - } - return false; -} - ValueObjectSP ABI::GetReturnValueObject(Thread &thread, CompilerType &ast_type, bool persistent) const { if (!ast_type.IsValid()) @@ -229,3 +211,20 @@ std::unique_ptr ABI::MakeMCRegisterInfo(const ArchSpec &ar assert(info_up); return info_up; } + +void ABI::AugmentRegisterInfo(RegisterInfo &info) { + if (info.kinds[eRegisterKindEHFrame] != LLDB_INVALID_REGNUM && + info.kinds[eRegisterKindDWARF] != LLDB_INVALID_REGNUM) + return; + + RegisterInfo abi_info; + if (!GetRegisterInfoByName(ConstString(info.name), abi_info)) + return; + + if (info.kinds[eRegisterKindEHFrame] == LLDB_INVALID_REGNUM) + info.kinds[eRegisterKindEHFrame] = abi_info.kinds[eRegisterKindEHFrame]; + if (info.kinds[eRegisterKindDWARF] == LLDB_INVALID_REGNUM) + info.kinds[eRegisterKindDWARF] = abi_info.kinds[eRegisterKindDWARF]; + if (info.kinds[eRegisterKindGeneric] == LLDB_INVALID_REGNUM) + info.kinds[eRegisterKindGeneric] = abi_info.kinds[eRegisterKindGeneric]; +} diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c9849a9e5f09f..aaf48f35f921d 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -406,7 +406,7 @@ void Platform::GetStatus(Stream &strm) { if (arch.IsValid()) { if (!arch.GetTriple().str().empty()) { strm.Printf(" Triple: "); - arch.DumpTriple(strm); + arch.DumpTriple(strm.AsRawOstream()); strm.EOL(); } } diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index ed0b951fbce1a..a731a353c1bc1 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -1486,8 +1486,7 @@ const lldb::ABISP &Process::GetABI() { return m_abi_sp; } -std::vector -Process::GetLanguageRuntimes(bool retry_if_null) { +std::vector Process::GetLanguageRuntimes() { std::vector language_runtimes; if (m_finalizing) @@ -1500,15 +1499,14 @@ Process::GetLanguageRuntimes(bool retry_if_null) { // yet or the proper condition for loading wasn't yet met (e.g. libc++.so // hadn't been loaded). for (const lldb::LanguageType lang_type : Language::GetSupportedLanguages()) { - if (LanguageRuntime *runtime = GetLanguageRuntime(lang_type, retry_if_null)) + if (LanguageRuntime *runtime = GetLanguageRuntime(lang_type)) language_runtimes.emplace_back(runtime); } return language_runtimes; } -LanguageRuntime *Process::GetLanguageRuntime(lldb::LanguageType language, - bool retry_if_null) { +LanguageRuntime *Process::GetLanguageRuntime(lldb::LanguageType language) { if (m_finalizing) return nullptr; @@ -1517,7 +1515,7 @@ LanguageRuntime *Process::GetLanguageRuntime(lldb::LanguageType language, std::lock_guard guard(m_language_runtimes_mutex); LanguageRuntimeCollection::iterator pos; pos = m_language_runtimes.find(language); - if (pos == m_language_runtimes.end() || (retry_if_null && !pos->second)) { + if (pos == m_language_runtimes.end() || !pos->second) { lldb::LanguageRuntimeSP runtime_sp( LanguageRuntime::FindPlugin(this, language)); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 4b9a1b77ad16d..59f72141ee5fc 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -404,8 +404,8 @@ Target::CreateAddressInModuleBreakpoint(lldb::addr_t file_addr, bool internal, bool request_hardware) { SearchFilterSP filter_sp( new SearchFilterForUnconstrainedSearches(shared_from_this())); - BreakpointResolverSP resolver_sp( - new BreakpointResolverAddress(nullptr, file_addr, file_spec)); + BreakpointResolverSP resolver_sp(new BreakpointResolverAddress( + nullptr, file_addr, file_spec ? *file_spec : FileSpec())); return CreateBreakpoint(filter_sp, resolver_sp, internal, request_hardware, false); } @@ -728,11 +728,17 @@ void Target::ConfigureBreakpointName( } void Target::ApplyNameToBreakpoints(BreakpointName &bp_name) { - BreakpointList bkpts_with_name(false); - m_breakpoint_list.FindBreakpointsByName(bp_name.GetName().AsCString(), - bkpts_with_name); + llvm::Expected> expected_vector = + m_breakpoint_list.FindBreakpointsByName(bp_name.GetName().AsCString()); + + if (!expected_vector) { + LLDB_LOG(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_BREAKPOINTS), + "invalid breakpoint name: {}", + llvm::toString(expected_vector.takeError())); + return; + } - for (auto bp_sp : bkpts_with_name.Breakpoints()) + for (auto bp_sp : *expected_vector) bp_name.ConfigureBreakpoint(bp_sp); } @@ -1425,8 +1431,7 @@ void Target::SetExecutableModule(ModuleSP &executable_sp, ModuleList added_modules; executable_objfile->GetDependentModules(dependent_files); for (uint32_t i = 0; i < dependent_files.GetSize(); i++) { - FileSpec dependent_file_spec( - dependent_files.GetFileSpecPointerAtIndex(i)); + FileSpec dependent_file_spec(dependent_files.GetFileSpecAtIndex(i)); FileSpec platform_dependent_file_spec; if (m_platform_sp) m_platform_sp->GetFileWithUUID(dependent_file_spec, nullptr, @@ -3177,7 +3182,7 @@ void Target::StopHook::SetThreadSpecifier(ThreadSpec *specifier) { void Target::StopHook::GetDescription(Stream *s, lldb::DescriptionLevel level) const { - int indent_level = s->GetIndentLevel(); + unsigned indent_level = s->GetIndentLevel(); s->SetIndentLevel(indent_level + 2); @@ -4094,7 +4099,7 @@ void Target::TargetEventData::Dump(Stream *s) const { if (i != 0) *s << ", "; m_module_list.GetModuleAtIndex(i)->GetDescription( - s, lldb::eDescriptionLevelBrief); + s->AsRawOstream(), lldb::eDescriptionLevelBrief); } } diff --git a/lldb/source/Target/TargetList.cpp b/lldb/source/Target/TargetList.cpp index 7c7a36e97bbfe..1b4db0c2aba59 100644 --- a/lldb/source/Target/TargetList.cpp +++ b/lldb/source/Target/TargetList.cpp @@ -144,9 +144,9 @@ Status TargetList::CreateTargetInternal( StreamString platform_arch_strm; StreamString module_arch_strm; - platform_arch.DumpTriple(platform_arch_strm); + platform_arch.DumpTriple(platform_arch_strm.AsRawOstream()); matching_module_spec.GetArchitecture().DumpTriple( - module_arch_strm); + module_arch_strm.AsRawOstream()); error.SetErrorStringWithFormat( "the specified architecture '%s' is not compatible with '%s' " "in '%s'", @@ -457,15 +457,12 @@ TargetSP TargetList::FindTargetWithExecutableAndArchitecture( const FileSpec &exe_file_spec, const ArchSpec *exe_arch_ptr) const { std::lock_guard guard(m_target_list_mutex); TargetSP target_sp; - bool full_match = (bool)exe_file_spec.GetDirectory(); - collection::const_iterator pos, end = m_target_list.end(); for (pos = m_target_list.begin(); pos != end; ++pos) { Module *exe_module = (*pos)->GetExecutableModulePointer(); if (exe_module) { - if (FileSpec::Equal(exe_file_spec, exe_module->GetFileSpec(), - full_match)) { + if (FileSpec::Match(exe_file_spec, exe_module->GetFileSpec())) { if (exe_arch_ptr) { if (!exe_arch_ptr->IsCompatibleMatch(exe_module->GetArchitecture())) continue; diff --git a/lldb/source/Target/ThreadPlanStepInRange.cpp b/lldb/source/Target/ThreadPlanStepInRange.cpp index 77772aed516bd..fdb2782bc5182 100644 --- a/lldb/source/Target/ThreadPlanStepInRange.cpp +++ b/lldb/source/Target/ThreadPlanStepInRange.cpp @@ -339,7 +339,7 @@ bool ThreadPlanStepInRange::FrameMatchesAvoidCriteria() { if (frame_library) { for (size_t i = 0; i < num_libraries; i++) { const FileSpec &file_spec(libraries_to_avoid.GetFileSpecAtIndex(i)); - if (FileSpec::Equal(file_spec, frame_library, false)) { + if (FileSpec::Match(file_spec, frame_library)) { libraries_say_avoid = true; break; } diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp index 62d9d246255a1..bbfa5cf61d014 100644 --- a/lldb/source/Utility/ArchSpec.cpp +++ b/lldb/source/Utility/ArchSpec.cpp @@ -868,7 +868,7 @@ void ArchSpec::MergeFrom(const ArchSpec &other) { IsCompatibleMatch(other) && GetCore() == ArchSpec::eCore_arm_generic && other.GetCore() != ArchSpec::eCore_arm_generic) { m_core = other.GetCore(); - CoreUpdated(true); + CoreUpdated(false); } if (GetFlags() == 0) { SetFlags(other.GetFlags()); @@ -1443,21 +1443,24 @@ bool ArchSpec::IsAlwaysThumbInstructions() const { GetCore() == ArchSpec::Core::eCore_thumbv6m) { return true; } + // Windows on ARM is always thumb. + if (GetTriple().isOSWindows()) + return true; } return false; } -void ArchSpec::DumpTriple(Stream &s) const { +void ArchSpec::DumpTriple(llvm::raw_ostream &s) const { const llvm::Triple &triple = GetTriple(); llvm::StringRef arch_str = triple.getArchName(); llvm::StringRef vendor_str = triple.getVendorName(); llvm::StringRef os_str = triple.getOSName(); llvm::StringRef environ_str = triple.getEnvironmentName(); - s.Printf("%s-%s-%s", arch_str.empty() ? "*" : arch_str.str().c_str(), - vendor_str.empty() ? "*" : vendor_str.str().c_str(), - os_str.empty() ? "*" : os_str.str().c_str()); + s << llvm::formatv("{0}-{1}-{2}", arch_str.empty() ? "*" : arch_str, + vendor_str.empty() ? "*" : vendor_str, + os_str.empty() ? "*" : os_str); if (!environ_str.empty()) - s.Printf("-%s", environ_str.str().c_str()); + s << "-" << environ_str; } diff --git a/lldb/source/Utility/Baton.cpp b/lldb/source/Utility/Baton.cpp index 84e295e246864..7bba10dcec962 100644 --- a/lldb/source/Utility/Baton.cpp +++ b/lldb/source/Utility/Baton.cpp @@ -8,5 +8,6 @@ #include "lldb/Utility/Baton.h" -void lldb_private::UntypedBaton::GetDescription( - Stream *s, lldb::DescriptionLevel level) const {} +void lldb_private::UntypedBaton::GetDescription(llvm::raw_ostream &s, + lldb::DescriptionLevel level, + unsigned indentation) const {} diff --git a/lldb/source/Utility/FileSpec.cpp b/lldb/source/Utility/FileSpec.cpp index 88966843072b6..a9e542991f179 100644 --- a/lldb/source/Utility/FileSpec.cpp +++ b/lldb/source/Utility/FileSpec.cpp @@ -75,15 +75,6 @@ FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) { FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple) : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {} -// Copy constructor -FileSpec::FileSpec(const FileSpec *rhs) : m_directory(), m_filename() { - if (rhs) - *this = *rhs; -} - -// Virtual destructor in case anyone inherits from this class. -FileSpec::~FileSpec() {} - namespace { /// Safely get a character at the specified index. /// @@ -302,20 +293,18 @@ int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) { } bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) { - // case sensitivity of equality test - const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive(); + if (full || (a.GetDirectory() && b.GetDirectory())) + return a == b; - const bool filenames_equal = ConstString::Equals(a.m_filename, - b.m_filename, - case_sensitive); - - if (!filenames_equal) - return false; - - if (!full && (a.GetDirectory().IsEmpty() || b.GetDirectory().IsEmpty())) - return filenames_equal; + return a.FileEquals(b); +} - return a == b; +bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) { + if (pattern.GetDirectory()) + return pattern == file; + if (pattern.GetFilename()) + return pattern.FileEquals(file); + return true; } llvm::Optional FileSpec::GuessPathStyle(llvm::StringRef absolute_path) { diff --git a/lldb/source/Utility/ProcessInfo.cpp b/lldb/source/Utility/ProcessInfo.cpp index 5743d223be4fa..a02ee1af867a0 100644 --- a/lldb/source/Utility/ProcessInfo.cpp +++ b/lldb/source/Utility/ProcessInfo.cpp @@ -49,7 +49,7 @@ llvm::StringRef ProcessInfo::GetNameAsStringRef() const { void ProcessInfo::Dump(Stream &s, Platform *platform) const { s << "Executable: " << GetName() << "\n"; s << "Triple: "; - m_arch.DumpTriple(s); + m_arch.DumpTriple(s.AsRawOstream()); s << "\n"; s << "Arguments:\n"; @@ -137,7 +137,7 @@ void ProcessInstanceInfo::Dump(Stream &s, UserIDResolver &resolver) const { if (m_arch.IsValid()) { s.Printf(" arch = "); - m_arch.DumpTriple(s); + m_arch.DumpTriple(s.AsRawOstream()); s.EOL(); } @@ -189,7 +189,7 @@ void ProcessInstanceInfo::DumpAsTableRow(Stream &s, UserIDResolver &resolver, StreamString arch_strm; if (m_arch.IsValid()) - m_arch.DumpTriple(arch_strm); + m_arch.DumpTriple(arch_strm.AsRawOstream()); auto print = [&](bool (ProcessInstanceInfo::*isValid)() const, uint32_t (ProcessInstanceInfo::*getID)() const, diff --git a/lldb/source/Utility/Reproducer.cpp b/lldb/source/Utility/Reproducer.cpp index e0806f5f5981d..8a28e9b13675a 100644 --- a/lldb/source/Utility/Reproducer.cpp +++ b/lldb/source/Utility/Reproducer.cpp @@ -25,6 +25,16 @@ llvm::Error Reproducer::Initialize(ReproducerMode mode, lldbassert(!InstanceImpl() && "Already initialized."); InstanceImpl().emplace(); + // The environment can override the capture mode. + if (mode != ReproducerMode::Replay) { + std::string env = + llvm::StringRef(getenv("LLDB_CAPTURE_REPRODUCER")).lower(); + if (env == "0" || env == "off") + mode = ReproducerMode::Off; + else if (env == "1" || env == "on") + mode = ReproducerMode::Capture; + } + switch (mode) { case ReproducerMode::Capture: { if (!root) { diff --git a/lldb/source/Utility/Status.cpp b/lldb/source/Utility/Status.cpp index 3b5094d64b75a..b74db72773dd4 100644 --- a/lldb/source/Utility/Status.cpp +++ b/lldb/source/Utility/Status.cpp @@ -100,14 +100,23 @@ static std::string RetrieveWin32ErrorString(uint32_t error_code) { char *buffer = nullptr; std::string message; // Retrieve win32 system error. + // First, attempt to load a en-US message if (::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_MAX_WIDTH_MASK, - NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + NULL, error_code, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), (LPSTR)&buffer, 0, NULL)) { message.assign(buffer); ::LocalFree(buffer); } + // If the previous didn't work, use the default OS language + else if (::FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, error_code, 0, (LPSTR)&buffer, 0, NULL)) { + message.assign(buffer); + ::LocalFree(buffer); + } return message; } #endif diff --git a/lldb/source/Utility/Stream.cpp b/lldb/source/Utility/Stream.cpp index c48a12acd9064..2ef4cd78ab034 100644 --- a/lldb/source/Utility/Stream.cpp +++ b/lldb/source/Utility/Stream.cpp @@ -160,65 +160,19 @@ Stream &Stream::operator<<(const void *p) { return *this; } -// Stream a uint8_t "uval" out to this stream. -Stream &Stream::operator<<(uint8_t uval) { - PutHex8(uval); - return *this; -} - -// Stream a uint16_t "uval" out to this stream. -Stream &Stream::operator<<(uint16_t uval) { - PutHex16(uval, m_byte_order); - return *this; -} - -// Stream a uint32_t "uval" out to this stream. -Stream &Stream::operator<<(uint32_t uval) { - PutHex32(uval, m_byte_order); - return *this; -} - -// Stream a uint64_t "uval" out to this stream. -Stream &Stream::operator<<(uint64_t uval) { - PutHex64(uval, m_byte_order); - return *this; -} - -// Stream a int8_t "sval" out to this stream. -Stream &Stream::operator<<(int8_t sval) { - Printf("%i", static_cast(sval)); - return *this; -} - -// Stream a int16_t "sval" out to this stream. -Stream &Stream::operator<<(int16_t sval) { - Printf("%i", static_cast(sval)); - return *this; -} - -// Stream a int32_t "sval" out to this stream. -Stream &Stream::operator<<(int32_t sval) { - Printf("%i", static_cast(sval)); - return *this; -} - -// Stream a int64_t "sval" out to this stream. -Stream &Stream::operator<<(int64_t sval) { - Printf("%" PRIi64, sval); - return *this; -} - // Get the current indentation level -int Stream::GetIndentLevel() const { return m_indent_level; } +unsigned Stream::GetIndentLevel() const { return m_indent_level; } // Set the current indentation level -void Stream::SetIndentLevel(int indent_level) { m_indent_level = indent_level; } +void Stream::SetIndentLevel(unsigned indent_level) { + m_indent_level = indent_level; +} // Increment the current indentation level -void Stream::IndentMore(int amount) { m_indent_level += amount; } +void Stream::IndentMore(unsigned amount) { m_indent_level += amount; } // Decrement the current indentation level -void Stream::IndentLess(int amount) { +void Stream::IndentLess(unsigned amount) { if (m_indent_level >= amount) m_indent_level -= amount; else diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 06125a1aaeddb..9b1c3c12f172d 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -54,6 +54,11 @@ def find_shlibpath_var(): lit_config.warning("unable to inject shared library path on '{}'".format( platform.system())) +# Propagate LLDB_CAPTURE_REPRODUCER +if 'LLDB_CAPTURE_REPRODUCER' in os.environ: + config.environment['LLDB_CAPTURE_REPRODUCER'] = os.environ[ + 'LLDB_CAPTURE_REPRODUCER'] + # Clean the module caches in the test build directory. This is necessary in an # incremental build whenever clang changes underneath, so doing it once per # lit.py invocation is close enough. diff --git a/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml new file mode 100644 index 0000000000000..330a761d88b4c --- /dev/null +++ b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml @@ -0,0 +1,37 @@ +--- !minidump +Version: 0xA0BAA793 +Flags: 0x0000000000000800 +Streams: + - Type: ThreadList + Threads: + - Thread Id: 0x00004034 + Suspend Count: 0x00000001 + Priority Class: 0x00000020 + Environment Block: 0x00000000007E6000 + Context: 0000000000000000 + Stack: + Start of Memory Range: 0x00000000008FF758 + Content: 00000000000000 + - Type: ModuleList + Modules: + - Base of Image: 0x0000000000C70000 + Size of Image: 0x00002000 + Time Date Stamp: 1574942531 + Module Name: 'arm-fp-unwind.exe' + CodeView Record: '' + Reserved0: 0x0000000000008140 + - Type: SystemInfo + Processor Arch: ARM + Processor Level: 2049 + Processor Revision: 2564 + Number of Processors: 8 + Product type: 1 + Major Version: 10 + Build Number: 18362 + Platform ID: Win32NT + Suite Mask: 0x0100 + CPU: + CPUID: 0xEB8C1004 + - Type: MiscInfo + Content: 54050000F7010000183800002EB9DF5D00000000000000006C0700002B0100006C0700000400000003000000002000000D000000000000000100000088FFFFFF46004C00450020005300740061006E0064006100720064002000540069006D00650000000000000000000000000000000000000000000000000000000000000000000A000000050004000000000000000000000046004C00450020004400610079006C0069006700680074002000540069006D00650000000000000000000000000000000000000000000000000000000000000000000300000005000300000000000000C4FFFFFF310038003300360032002E003200330039002E00610072006D006600720065002E0031003900680031005F00720065006C0065006100730065005F007300760063005F00700072006F00640031002E003100390030003600320038002D0031003600340031000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000064006200670063006F00720065002E0077006F0061002C00310030002E0030002E00310038003300360032002E003100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +... diff --git a/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml new file mode 100644 index 0000000000000..f3229060635f2 --- /dev/null +++ b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml @@ -0,0 +1,92 @@ +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4097 + ImageBase: 4194304 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 0 + Size: 0 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 0 + Size: 0 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_ARMNT + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 38 + SectionData: 2DE90048EB46ADF5007D684600F004F80DF5007DBDE8008800BE01784278415C805C08447047 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: entry + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: other + Value: 24 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test b/lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test new file mode 100644 index 0000000000000..35ea7c8a9de0e --- /dev/null +++ b/lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test @@ -0,0 +1,17 @@ +Test that unwind plans use the frame pointer register correctly. + +REQUIRES: arm + +RUN: yaml2obj %S/Inputs/arm-fp-unwind.exe.yaml > %T/arm-fp-unwind.exe +RUN: yaml2obj %S/Inputs/arm-fp-unwind.dmp.yaml > %T/arm-fp-unwind.dmp +RUN: %lldb -O "settings set target.exec-search-paths %T" \ +RUN: -c %T/arm-fp-unwind.dmp -o "image show-unwind -a 0x00c71010" -b \ +RUN: | FileCheck %s + +CHECK: Assembly language inspection UnwindPlan: +CHECK-NEXT: This UnwindPlan originally sourced from EmulateInstructionARM +CHECK-NEXT: This UnwindPlan is sourced from the compiler: no. +CHECK-NEXT: This UnwindPlan is valid at all instruction locations: yes. +CHECK-NEXT: row[0]: 0: CFA=sp +0 => +CHECK-NEXT: row[1]: 4: CFA=sp +8 => fp=[CFA-8] lr=[CFA-4] +CHECK-NEXT: row[2]: 6: CFA=fp +8 => fp=[CFA-8] lr=[CFA-4] diff --git a/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml b/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml index f9786b3754f84..08366056947bf 100644 --- a/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml @@ -4,8 +4,25 @@ # RUN: llvm-objcopy --strip-all %t/.build-id/1b/8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug %t/stripped.out # RUN: lldb-test object-file %t/stripped.out | FileCheck %s +# CHECK: Name: .debug_abbrev +# CHECK: Name: .debug_addr +# CHECK: Name: .debug_aranges # CHECK: Name: .debug_frame -# CHECK-NEXT: Type: dwarf-frame +# CHECK: Name: .debug_info +# CHECK: Name: .debug_line +# CHECK: Name: .debug_line_str +# CHECK: Name: .debug_loc +# CHECK: Name: .debug_loclists +# CHECK: Name: .debug_macinfo +# CHECK: Name: .debug_macro +# CHECK: Name: .debug_names +# CHECK: Name: .debug_pubnames +# CHECK: Name: .debug_pubtypes +# CHECK: Name: .debug_ranges +# CHECK: Name: .debug_rnglists +# CHECK: Name: .debug_str +# CHECK: Name: .debug_str_offsets +# CHECK: Name: .debug_types --- !ELF FileHeader: @@ -27,9 +44,62 @@ Sections: Address: 0x00000000004003D0 AddressAlign: 0x0000000000000010 Content: DEADBEEFBAADF00D + - Name: .debug_abbrev + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_addr + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_aranges + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D - Name: .debug_frame Type: SHT_PROGBITS - AddressAlign: 0x0000000000000008 + Content: DEADBEEFBAADF00D + - Name: .debug_info + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_line + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_line_str + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_loc + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_loclists + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_macinfo + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_macro + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_names + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_pubnames + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_pubtypes + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_ranges + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_rnglists + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_str + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_str_offsets + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_types + Type: SHT_PROGBITS Content: DEADBEEFBAADF00D Symbols: - Name: main diff --git a/lldb/test/Shell/ObjectFile/ELF/section-types.yaml b/lldb/test/Shell/ObjectFile/ELF/section-types.yaml index 9f6b4c0533b91..caac76a789ce0 100644 --- a/lldb/test/Shell/ObjectFile/ELF/section-types.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/section-types.yaml @@ -13,6 +13,12 @@ # CHECK-LABEL: Name: .debug_types.dwo # CHECK-NEXT: Type: dwarf-types-dwo +# CHECK-LABEL: Name: .debug_rnglists +# CHECK-NEXT: Type: dwarf-rnglists + +# CHECK-LABEL: Name: .debug_rnglists.dwo +# CHECK-NEXT: Type: dwarf-rnglists-dwo + # CHECK-LABEL: Name: .debug_names # CHECK-NEXT: Type: dwarf-names @@ -58,6 +64,14 @@ Sections: Type: SHT_PROGBITS AddressAlign: 0x0000000000000001 Content: DEADBEEFBAADF00D + - Name: .debug_rnglists + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: DEADBEEFBAADF00D + - Name: .debug_rnglists.dwo + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: DEADBEEFBAADF00D - Name: .debug_names Type: SHT_PROGBITS AddressAlign: 0x0000000000000001 diff --git a/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml b/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml new file mode 100644 index 0000000000000..5515824e776bc --- /dev/null +++ b/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml @@ -0,0 +1,94 @@ +# REQUIRES: arm + +# RUN: yaml2obj %s > %t.exe +# RUN: %lldb %t.exe -o "disassemble -b -n entry" -b | FileCheck %s + +# CHECK: {{.*}}.exe[0x401000] <+0>: 0x0040 lsls r0, r0, #0x1 +# CHECK: {{.*}}.exe[0x401002] <+2>: 0x4770 bx lr + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4097 + ImageBase: 4194304 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 0 + Size: 0 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 0 + Size: 0 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_ARMNT + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 4 + SectionData: '40007047' +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: entry + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml b/lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml new file mode 100644 index 0000000000000..caf955500e09f --- /dev/null +++ b/lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml @@ -0,0 +1,92 @@ +# RUN: yaml2obj %s > %t +# RUN: lldb-test object-file %t | FileCheck %s + +# CHECK-LABEL: Name: .text +# CHECK-NEXT: Type: code + +# CHECK-LABEL: Name: .eh_fram +# CHECK-NEXT: Type: eh-frame +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4096 + ImageBase: 4194304 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT, IMAGE_DLL_CHARACTERISTICS_NO_SEH, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 0 + Size: 0 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 12288 + Size: 12 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 5 + SectionData: 5589E55DC3 + - Name: .eh_fram + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + VirtualAddress: 8192 + VirtualSize: 52 + SectionData: 1400000000000000017A5200017C0801000C040488010000180000001C000000001040000500000000410E088502420D05000000 + - Name: .reloc + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 12288 + VirtualSize: 12 + SectionData: 002000000C00000020300000 +symbols: +... diff --git a/lldb/test/Shell/Reproducer/TestCaptureEnvOverride.test b/lldb/test/Shell/Reproducer/TestCaptureEnvOverride.test new file mode 100644 index 0000000000000..a8e7bdec250e6 --- /dev/null +++ b/lldb/test/Shell/Reproducer/TestCaptureEnvOverride.test @@ -0,0 +1,20 @@ +# UNSUPPORTED: system-windows +# This tests the LLDB_CAPTURE_REPRODUCER override. + +# RUN: %lldb -b -o 'reproducer status' --capture --capture-path %t.repro /bin/ls | FileCheck %s --check-prefix CAPTURE +# RUN: %lldb -b -o 'reproducer status' --capture | FileCheck %s --check-prefix CAPTURE + +# RUN: LLDB_CAPTURE_REPRODUCER=1 %lldb -b -o 'reproducer status' | FileCheck %s --check-prefix CAPTURE +# RUN: LLDB_CAPTURE_REPRODUCER=ON %lldb -b -o 'reproducer status' | FileCheck %s --check-prefix CAPTURE +# RUN: LLDB_CAPTURE_REPRODUCER=on %lldb -b -o 'reproducer status' | FileCheck %s --check-prefix CAPTURE + +# RUN: LLDB_CAPTURE_REPRODUCER=0 %lldb -b -o 'reproducer status' --capture --capture-path %t.repro | FileCheck %s --check-prefix OFF +# RUN: LLDB_CAPTURE_REPRODUCER=0 %lldb -b -o 'reproducer status' --capture | FileCheck %s --check-prefix OFF +# RUN: LLDB_CAPTURE_REPRODUCER=OFF %lldb -b -o 'reproducer status' --capture --capture-path %t.repro | FileCheck %s --check-prefix OFF +# RUN: LLDB_CAPTURE_REPRODUCER=off %lldb -b -o 'reproducer status' --capture | FileCheck %s --check-prefix OFF + +# RUN: LLDB_CAPTURE_REPRODUCER=bogus %lldb -b -o 'reproducer status' --capture | FileCheck %s --check-prefix CAPTURE +# RUN: LLDB_CAPTURE_REPRODUCER=bogus %lldb -b -o 'reproducer status' | FileCheck %s --check-prefix OFF + +# CAPTURE: Reproducer is in capture mode. +# OFF: Reproducer is off. diff --git a/lldb/test/Shell/Reproducer/TestVersionCheck.test b/lldb/test/Shell/Reproducer/TestVersionCheck.test new file mode 100644 index 0000000000000..e3fb60367cec2 --- /dev/null +++ b/lldb/test/Shell/Reproducer/TestVersionCheck.test @@ -0,0 +1,29 @@ +# REQUIRES: system-darwin + +# This tests the reproducer version check. + +# RUN: rm -rf %t.repro +# RUN: %clang_host %S/Inputs/simple.c -g -o %t.out +# RUN: %lldb -x -b -s %S/Inputs/FileCapture.in --capture --capture-path %t.repro %t.out | FileCheck %s --check-prefix CHECK --check-prefix CAPTURE + +# Make sure that replay works. +# RUN: %lldb --replay %t.repro | FileCheck %s --check-prefix CHECK --check-prefix REPLAY + +# Change the reproducer version. +# RUN: echo "bogus" >> %t.repro/version.txt + +# Make sure that replay works. +# RUN: not %lldb --replay %t.repro 2>&1 | FileCheck %s --check-prefix ERROR + +# Make sure that we can circumvent the version check with -reproducer-skip-version-check. +# RUN: %lldb --replay %t.repro -reproducer-skip-version-check | FileCheck %s --check-prefix CHECK --check-prefix REPLAY + +# CAPTURE: testing +# REPLAY-NOT: testing + +# CHECK: Process {{.*}} exited + +# CAPTURE: Reproducer is in capture mode. +# CAPTURE: Reproducer written + +# ERROR: error: reproducer replay failed: reproducer capture and replay version don't match diff --git a/lldb/test/Shell/Reproducer/lit.local.cfg b/lldb/test/Shell/Reproducer/lit.local.cfg index 5659f1baa06df..dbb37b199d781 100644 --- a/lldb/test/Shell/Reproducer/lit.local.cfg +++ b/lldb/test/Shell/Reproducer/lit.local.cfg @@ -1,2 +1,6 @@ # Enable crash reports for the reproducer tests. -del config.environment['LLVM_DISABLE_CRASH_REPORT'] +if 'LLVM_DISABLE_CRASH_REPORT' in config.environment: + del config.environment['LLVM_DISABLE_CRASH_REPORT'] + +if 'LLDB_CAPTURE_REPRODUCER' in config.environment: + del config.environment['LLDB_CAPTURE_REPRODUCER'] diff --git a/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s b/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s index f00fe2ad005d2..b810527b5535e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s +++ b/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s @@ -10,7 +10,7 @@ # RUN: lldb-test symbols %t | FileCheck %s # CHECK: Variable{0x7fffffff0000001e}, name = "X" -# CHECK-SAME: type = {7fffffff00000033} 0x{{[0-9a-f]*}} (char [56]) +# CHECK-SAME: type = {7fffffff00000033} 0x{{[0-9A-F]*}} (char [56]) # Generated from "char X[47];" diff --git a/lldb/test/Shell/SymbolFile/DWARF/debug_ranges.s b/lldb/test/Shell/SymbolFile/DWARF/debug_ranges.s index bbe5cb220c2da..13eea1b80706e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/debug_ranges.s +++ b/lldb/test/Shell/SymbolFile/DWARF/debug_ranges.s @@ -3,16 +3,13 @@ # RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t # RUN: %lldb %t -o "image lookup -v -s lookup_ranges" -o exit | FileCheck %s -# CHECK: Function: id = {0x7fffffff0000001c}, name = "ranges", range = [0x0000000000000000-0x0000000000000004) -# CHECK: Blocks: id = {0x7fffffff0000001c}, range = [0x00000000-0x00000004) -# CHECK-NEXT: id = {0x7fffffff0000002d}, ranges = [0x00000001-0x00000002)[0x00000003-0x00000004) +# CHECK: Function: id = {0x7fffffff0000002b}, name = "ranges", range = [0x0000000000000000-0x0000000000000004) +# CHECK: Blocks: id = {0x7fffffff0000002b}, range = [0x00000000-0x00000004) +# CHECK-NEXT: id = {0x7fffffff0000003f}, ranges = [0x00000001-0x00000002)[0x00000003-0x00000004) .text .p2align 12 - .globl ranges - .type ranges,@function -ranges: # @ranges -.Lfoo_begin: +ranges: nop .Lblock1_begin: lookup_ranges: @@ -22,21 +19,14 @@ lookup_ranges: .Lblock2_begin: nop .Lblock2_end: -.Lfunc_end0: - .size ranges, .Lfunc_end0-ranges - # -- End function - .section .debug_str,"MS",@progbits,1 -.Lproducer: - .asciz "Hand-written DWARF" -.Lranges: - .asciz "ranges" +.Lranges_end: .section .debug_abbrev,"",@progbits .byte 1 # Abbreviation Code .byte 17 # DW_TAG_compile_unit .byte 1 # DW_CHILDREN_yes .byte 37 # DW_AT_producer - .byte 14 # DW_FORM_strp + .byte 8 # DW_FORM_string .byte 17 # DW_AT_low_pc .byte 1 # DW_FORM_addr .byte 18 # DW_AT_high_pc @@ -51,7 +41,7 @@ lookup_ranges: .byte 18 # DW_AT_high_pc .byte 6 # DW_FORM_data4 .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp + .byte 8 # DW_FORM_string .byte 0 # EOM(1) .byte 0 # EOM(2) .byte 5 # Abbreviation Code @@ -71,13 +61,13 @@ lookup_ranges: .long .debug_abbrev # Offset Into Abbrev. Section .byte 8 # Address Size (in bytes) .byte 1 # Abbrev [1] 0xb:0x7b DW_TAG_compile_unit - .long .Lproducer # DW_AT_producer - .quad .Lfoo_begin # DW_AT_low_pc - .long .Lfunc_end0-.Lfoo_begin # DW_AT_high_pc + .asciz "Hand-written DWARF" # DW_AT_producer + .quad ranges # DW_AT_low_pc + .long .Lranges_end-ranges # DW_AT_high_pc .byte 2 # Abbrev [2] 0x2a:0x4d DW_TAG_subprogram - .quad .Lfoo_begin # DW_AT_low_pc - .long .Lfunc_end0-.Lfoo_begin # DW_AT_high_pc - .long .Lranges # DW_AT_name + .quad ranges # DW_AT_low_pc + .long .Lranges_end-ranges # DW_AT_high_pc + .asciz "ranges" # DW_AT_name .byte 5 # Abbrev [5] 0x61:0x15 DW_TAG_lexical_block .long .Ldebug_ranges0 # DW_AT_ranges .byte 0 # End Of Children Mark @@ -86,9 +76,9 @@ lookup_ranges: .section .debug_ranges,"",@progbits .Ldebug_ranges0: - .quad .Lblock1_begin-.Lfoo_begin - .quad .Lblock1_end-.Lfoo_begin - .quad .Lblock2_begin-.Lfoo_begin - .quad .Lblock2_end-.Lfoo_begin + .quad .Lblock1_begin-ranges + .quad .Lblock1_end-ranges + .quad .Lblock2_begin-ranges + .quad .Lblock2_end-ranges .quad 0 .quad 0 diff --git a/lldb/test/Shell/SymbolFile/DWARF/debug_rnglists.s b/lldb/test/Shell/SymbolFile/DWARF/debug_rnglists.s index 5d95b80e8da6a..1d718054a5877 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/debug_rnglists.s +++ b/lldb/test/Shell/SymbolFile/DWARF/debug_rnglists.s @@ -1,18 +1,22 @@ # REQUIRES: x86 # RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t -# RUN: %lldb %t -o "image lookup -v -s lookup_rnglists" -o exit | FileCheck %s +# RUN: %lldb %t -o "image lookup -v -s lookup_rnglists" \ +# RUN: -o "image lookup -v -s lookup_rnglists2" -o exit | FileCheck %s -# CHECK: Function: id = {0x7fffffff00000021}, name = "rnglists", range = [0x0000000000000000-0x0000000000000004) -# CHECK: Blocks: id = {0x7fffffff00000021}, range = [0x00000000-0x00000004) -# CHECK-NEXT: id = {0x7fffffff00000032}, ranges = [0x00000001-0x00000002)[0x00000003-0x00000004) +# CHECK-LABEL: image lookup -v -s lookup_rnglists +# CHECK: Function: id = {0x7fffffff00000030}, name = "rnglists", range = [0x0000000000000000-0x0000000000000004) +# CHECK: Blocks: id = {0x7fffffff00000030}, range = [0x00000000-0x00000004) +# CHECK-NEXT: id = {0x7fffffff00000046}, ranges = [0x00000001-0x00000002)[0x00000003-0x00000004) + +# CHECK-LABEL: image lookup -v -s lookup_rnglists2 +# CHECK: Function: id = {0x7fffffff0000007a}, name = "rnglists2", range = [0x0000000000000004-0x0000000000000007) +# CHECK: Blocks: id = {0x7fffffff0000007a}, range = [0x00000004-0x00000007) +# CHECK-NEXT: id = {0x7fffffff00000091}, range = [0x00000005-0x00000007) .text .p2align 12 - .globl rnglists - .type rnglists,@function -rnglists: # @rnglists -.Lfoo_begin: +rnglists: nop .Lblock1_begin: lookup_rnglists: @@ -22,21 +26,23 @@ lookup_rnglists: .Lblock2_begin: nop .Lblock2_end: -.Lfunc_end0: - .size rnglists, .Lfunc_end0-rnglists - # -- End function - .section .debug_str,"MS",@progbits,1 -.Lproducer: - .asciz "Hand-written DWARF" -.Lrnglists: - .asciz "rnglists" +.Lrnglists_end: + +rnglists2: + nop +.Lblock3_begin: +lookup_rnglists2: + nop + nop +.Lblock3_end: +.Lrnglists2_end: .section .debug_abbrev,"",@progbits .byte 1 # Abbreviation Code .byte 17 # DW_TAG_compile_unit .byte 1 # DW_CHILDREN_yes .byte 37 # DW_AT_producer - .byte 14 # DW_FORM_strp + .byte 8 # DW_FORM_string .byte 17 # DW_AT_low_pc .byte 1 # DW_FORM_addr .byte 18 # DW_AT_high_pc @@ -53,7 +59,7 @@ lookup_rnglists: .byte 18 # DW_AT_high_pc .byte 6 # DW_FORM_data4 .byte 3 # DW_AT_name - .byte 14 # DW_FORM_strp + .byte 8 # DW_FORM_string .byte 0 # EOM(1) .byte 0 # EOM(2) .byte 5 # Abbreviation Code @@ -74,20 +80,42 @@ lookup_rnglists: .byte 8 # Address Size (in bytes) .long .debug_abbrev # Offset Into Abbrev. Section .byte 1 # Abbrev [1] 0xc:0x5f DW_TAG_compile_unit - .long .Lproducer # DW_AT_producer - .quad .Lfoo_begin # DW_AT_low_pc - .long .Lfunc_end0-.Lfoo_begin # DW_AT_high_pc + .asciz "Hand-written DWARF" # DW_AT_producer + .quad rnglists # DW_AT_low_pc + .long .Lrnglists_end-rnglists # DW_AT_high_pc .long .Lrnglists_table_base0 # DW_AT_rnglists_base .byte 2 # Abbrev [2] 0x2b:0x37 DW_TAG_subprogram - .quad .Lfoo_begin # DW_AT_low_pc - .long .Lfunc_end0-.Lfoo_begin # DW_AT_high_pc - .long .Lrnglists # DW_AT_name + .quad rnglists # DW_AT_low_pc + .long .Lrnglists_end-rnglists # DW_AT_high_pc + .asciz "rnglists" # DW_AT_name .byte 5 # Abbrev [5] 0x52:0xf DW_TAG_lexical_block .byte 0 # DW_AT_ranges .byte 0 # End Of Children Mark .byte 0 # End Of Children Mark .Ldebug_info_end0: +.Lcu_begin1: + .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit +.Ldebug_info_start1: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x5f DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .quad rnglists2 # DW_AT_low_pc + .long .Lrnglists2_end-rnglists2 # DW_AT_high_pc + .long .Lrnglists_table_base1 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x37 DW_TAG_subprogram + .quad rnglists2 # DW_AT_low_pc + .long .Lrnglists2_end-rnglists2 # DW_AT_high_pc + .asciz "rnglists2" # DW_AT_name + .byte 5 # Abbrev [5] 0x52:0xf DW_TAG_lexical_block + .byte 0 # DW_AT_ranges + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_end1: + .section .debug_rnglists,"",@progbits .long .Ldebug_rnglist_table_end0-.Ldebug_rnglist_table_start0 # Length .Ldebug_rnglist_table_start0: @@ -99,12 +127,25 @@ lookup_rnglists: .long .Ldebug_ranges0-.Lrnglists_table_base0 .Ldebug_ranges0: .byte 4 # DW_RLE_offset_pair - .uleb128 .Lblock1_begin-.Lfoo_begin # starting offset - .uleb128 .Lblock1_end-.Lfoo_begin # ending offset + .uleb128 .Lblock1_begin-rnglists # starting offset + .uleb128 .Lblock1_end-rnglists # ending offset .byte 4 # DW_RLE_offset_pair - .uleb128 .Lblock2_begin-.Lfoo_begin # starting offset - .uleb128 .Lblock2_end-.Lfoo_begin # ending offset + .uleb128 .Lblock2_begin-rnglists # starting offset + .uleb128 .Lblock2_end-rnglists # ending offset .byte 0 # DW_RLE_end_of_list .Ldebug_rnglist_table_end0: - .section .debug_macinfo,"",@progbits - .byte 0 # End Of Macro List Mark + + .long .Ldebug_rnglist_table_end1-.Ldebug_rnglist_table_start1 # Length +.Ldebug_rnglist_table_start1: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lrnglists_table_base1: + .long .Ldebug_ranges1-.Lrnglists_table_base1 +.Ldebug_ranges1: + .byte 4 # DW_RLE_offset_pair + .uleb128 .Lblock3_begin-rnglists2 # starting offset + .uleb128 .Lblock3_end-rnglists2 # ending offset + .byte 0 # DW_RLE_end_of_list +.Ldebug_rnglist_table_end1: diff --git a/lldb/test/Shell/SymbolFile/DWARF/dwarf5-debug_line.s b/lldb/test/Shell/SymbolFile/DWARF/dwarf5-debug_line.s new file mode 100644 index 0000000000000..d15f31039bbd6 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/dwarf5-debug_line.s @@ -0,0 +1,129 @@ +# Test handling of DWARF5 line tables. In particular, test that we handle files +# which are present in the line table more than once. + +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -o %t -triple x86_64-pc-linux %s +# RUN: %lldb %t -o "source info -f file0.c" -o "source info -f file1.c" \ +# RUN: -o "breakpoint set -f file0.c -l 42" \ +# RUN: -o "breakpoint set -f file0.c -l 47" \ +# RUN: -o exit | FileCheck %s + +# CHECK-LABEL: source info -f file0.c +# CHECK: [0x0000000000000000-0x0000000000000001): /file0.c:42 +# CHECK-LABEL: source info -f file1.c +# CHECK: [0x0000000000000001-0x0000000000000002): /file1.c:47 +# CHECK-LABEL: breakpoint set -f file0.c -l 42 +# CHECK: Breakpoint 1: {{.*}}`foo, +# CHECK-LABEL: breakpoint set -f file0.c -l 47 +# CHECK: Breakpoint 2: {{.*}}`foo + 2, + + .text + .globl foo +foo: + nop + nop + nop +.Lfoo_end: + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 8 # DW_FORM_string + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x23 DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 12 # DW_AT_language + .asciz "file0.c" # DW_AT_name + .long .Lline_table_begin # DW_AT_stmt_list + .asciz "/" # DW_AT_comp_dir + .quad foo # DW_AT_low_pc + .long .Lfoo_end-foo # DW_AT_high_pc +.Ldebug_info_end0: + + .section .debug_line,"",@progbits +.Lline_table_begin: + .long .Lline_end-.Lline_start +.Lline_start: + .short 5 # DWARF version number + .byte 8 # Address Size (in bytes) + .byte 0 # Segment Selector Size + .long .Lheader_end-.Lheader_start +.Lheader_start: + .byte 1 # Minimum Instruction Length + .byte 1 # Maximum Operations per Instruction + .byte 1 # Default is_stmt + .byte 0 # Line Base + .byte 0 # Line Range + .byte 5 # Opcode Base + .byte 0, 1, 1, 1 # Standard Opcode Lengths + + # Directory table format + .byte 1 # One element per directory entry + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + + # Directory table entries + .byte 1 # 1 directory + .asciz "/" + + # File table format + .byte 2 # 2 elements per file entry + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + .byte 2 # DW_LNCT_directory_index + .byte 0x0b # DW_FORM_data1 + + # File table entries + .byte 3 # 3 files + .asciz "file0.c" + .byte 0 + .asciz "file1.c" + .byte 0 + .asciz "file0.c" + .byte 0 +.Lheader_end: + + .byte 4, 0 # DW_LNS_set_file 0 + .byte 0, 9, 2 # DW_LNE_set_address + .quad foo + .byte 3, 41 # DW_LNS_advance_line 41 + .byte 1 # DW_LNS_copy + + .byte 4, 1 # DW_LNS_set_file 1 + .byte 2, 1 # DW_LNS_advance_pc 1 + .byte 3, 5 # DW_LNS_advance_line 5 + .byte 1 # DW_LNS_copy + + .byte 4, 2 # DW_LNS_set_file 2 + .byte 2, 1 # DW_LNS_advance_pc 1 + .byte 1 # DW_LNS_copy + + .byte 2, 1 # DW_LNS_advance_pc 1 + .byte 0, 1, 1 # DW_LNE_end_sequence +.Lline_end: diff --git a/lldb/test/Shell/SymbolFile/DWARF/win-i386-line-table.s b/lldb/test/Shell/SymbolFile/DWARF/win-i386-line-table.s new file mode 100644 index 0000000000000..2fa5ba5352b6b --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/win-i386-line-table.s @@ -0,0 +1,55 @@ +# Test that lldb can read a line table for an architecture with a different +# address size than the one that of the host. + +# REQUIRES: lld, x86 + +# RUN: llvm-mc -triple i686-windows-gnu %s -filetype=obj > %t.o +# RUN: lld-link %t.o -out:%t.exe -debug:dwarf -entry:entry -subsystem:console -lldmingw +# RUN: %lldb %t.exe -o "image dump line-table -v win-i386-line-table.c" -b | FileCheck %s + +# CHECK: Line table for win-i386-line-table.c in `win-i386-line-table.s.tmp.exe +# CHECK: 0x00401000: win-i386-line-table.c:2:1 +# CHECK: 0x00401001: win-i386-line-table.c:2:1 + + .text + .file "win-i386-line-table.c" + .globl _entry # -- Begin function entry +_entry: # @entry + .file 1 "" "win-i386-line-table.c" + .loc 1 1 0 # win-i386-line-table.c:1:0 + .cfi_sections .debug_frame + .cfi_startproc + .loc 1 2 1 prologue_end # win-i386-line-table.c:2:1 + retl + .cfi_endproc + # -- End function + .section .debug_str,"dr" +Linfo_string1: + .asciz "win-i386-line-table.c" + .section .debug_abbrev,"dr" +Lsection_abbrev: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"dr" +Lsection_info: +Lcu_begin0: + .long Ldebug_info_end0-Ldebug_info_start0 # Length of Unit +Ldebug_info_start0: + .short 4 # DWARF version number + .secrel32 Lsection_abbrev # Offset Into Abbrev. Section + .byte 4 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x2d DW_TAG_compile_unit + .secrel32 Linfo_string1 # DW_AT_name + .secrel32 Lline_table_start0 # DW_AT_stmt_list + .byte 0 # End Of Children Mark +Ldebug_info_end0: + .section .debug_line,"dr" +Lline_table_start0: diff --git a/lldb/test/Shell/lit.cfg.py b/lldb/test/Shell/lit.cfg.py index 84c5b730dd31e..68891e600169e 100644 --- a/lldb/test/Shell/lit.cfg.py +++ b/lldb/test/Shell/lit.cfg.py @@ -38,6 +38,10 @@ # test_exec_root: The root path where tests should be run. config.test_exec_root = os.path.join(config.lldb_obj_root, 'test') +# Propagate LLDB_CAPTURE_REPRODUCER +if 'LLDB_CAPTURE_REPRODUCER' in os.environ: + config.environment['LLDB_CAPTURE_REPRODUCER'] = os.environ[ + 'LLDB_CAPTURE_REPRODUCER'] llvm_config.use_default_substitutions() toolchain.use_lldb_substitutions(config) diff --git a/lldb/tools/debugserver/source/DNB.cpp b/lldb/tools/debugserver/source/DNB.cpp index c9f2e34e2798c..8d9c691f9d337 100644 --- a/lldb/tools/debugserver/source/DNB.cpp +++ b/lldb/tools/debugserver/source/DNB.cpp @@ -1722,6 +1722,8 @@ nub_bool_t DNBSetArchitecture(const char *arch) { else if (strstr(arch, "arm64_32") == arch || strstr(arch, "aarch64_32") == arch) return DNBArchProtocol::SetArchitecture(CPU_TYPE_ARM64_32); + else if (strstr(arch, "arm64e") == arch) + return DNBArchProtocol::SetArchitecture(CPU_TYPE_ARM64); else if (strstr(arch, "arm64") == arch || strstr(arch, "armv8") == arch || strstr(arch, "aarch64") == arch) return DNBArchProtocol::SetArchitecture(CPU_TYPE_ARM64); diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp index 1bf14d97056ce..e8c40910567ca 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp @@ -26,6 +26,10 @@ #include #include +#if __has_feature(ptrauth_calls) +#include +#endif + // Break only in privileged or user mode // (PAC bits in the DBGWVRn_EL1 watchpoint control register) #define S_USER ((uint32_t)(2u << 1)) @@ -93,7 +97,11 @@ uint32_t DNBArchMachARM64::GetCPUType() { return CPU_TYPE_ARM64; } uint64_t DNBArchMachARM64::GetPC(uint64_t failValue) { // Get program counter if (GetGPRState(false) == KERN_SUCCESS) +#if defined(__LP64__) + return arm_thread_state64_get_pc(m_state.context.gpr); +#else return m_state.context.gpr.__pc; +#endif return failValue; } @@ -101,7 +109,17 @@ kern_return_t DNBArchMachARM64::SetPC(uint64_t value) { // Get program counter kern_return_t err = GetGPRState(false); if (err == KERN_SUCCESS) { +#if defined(__LP64__) +#if __has_feature(ptrauth_calls) + // The incoming value could be garbage. Strip it to avoid + // trapping when it gets resigned in the thread state. + value = (uint64_t) ptrauth_strip((void*) value, ptrauth_key_function_pointer); + value = (uint64_t) ptrauth_sign_unauthenticated((void*) value, ptrauth_key_function_pointer, 0); +#endif + arm_thread_state64_set_pc_fptr (m_state.context.gpr, (void*) value); +#else m_state.context.gpr.__pc = value; +#endif err = SetGPRState(); } return err == KERN_SUCCESS; @@ -110,7 +128,11 @@ kern_return_t DNBArchMachARM64::SetPC(uint64_t value) { uint64_t DNBArchMachARM64::GetSP(uint64_t failValue) { // Get stack pointer if (GetGPRState(false) == KERN_SUCCESS) +#if defined(__LP64__) + return arm_thread_state64_get_sp(m_state.context.gpr); +#else return m_state.context.gpr.__sp; +#endif return failValue; } @@ -167,8 +189,15 @@ kern_return_t DNBArchMachARM64::GetGPRState(bool force) { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[0], x[11], x[12], x[13], x[14], x[15], x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23], x[24], x[25], x[26], x[27], x[28], +#if defined(__LP64__) + (uint64_t) arm_thread_state64_get_fp (m_state.context.gpr), + (uint64_t) arm_thread_state64_get_lr (m_state.context.gpr), + (uint64_t) arm_thread_state64_get_sp (m_state.context.gpr), + (uint64_t) arm_thread_state64_get_pc (m_state.context.gpr), +#else m_state.context.gpr.__fp, m_state.context.gpr.__lr, m_state.context.gpr.__sp, m_state.context.gpr.__pc, +#endif m_state.context.gpr.__cpsr); } m_state.SetError(set, Read, kret); @@ -564,12 +593,20 @@ kern_return_t DNBArchMachARM64::EnableHardwareSingleStep(bool enable) { if (enable) { DNBLogThreadedIf(LOG_STEP, "%s: Setting MDSCR_EL1 Single Step bit at pc 0x%llx", +#if defined(__LP64__) + __FUNCTION__, (uint64_t)arm_thread_state64_get_pc (m_state.context.gpr)); +#else __FUNCTION__, (uint64_t)m_state.context.gpr.__pc); +#endif m_state.dbg.__mdscr_el1 |= SS_ENABLE; } else { DNBLogThreadedIf(LOG_STEP, "%s: Clearing MDSCR_EL1 Single Step bit at pc 0x%llx", +#if defined(__LP64__) + __FUNCTION__, (uint64_t)arm_thread_state64_get_pc (m_state.context.gpr)); +#else __FUNCTION__, (uint64_t)m_state.context.gpr.__pc); +#endif m_state.dbg.__mdscr_el1 &= ~(SS_ENABLE); } @@ -1409,10 +1446,28 @@ const DNBRegisterInfo DNBArchMachARM64::g_gpr_registers[] = { DEFINE_GPR_IDX(26, x26, NULL, INVALID_NUB_REGNUM), DEFINE_GPR_IDX(27, x27, NULL, INVALID_NUB_REGNUM), DEFINE_GPR_IDX(28, x28, NULL, INVALID_NUB_REGNUM), - DEFINE_GPR_NAME(fp, "x29", GENERIC_REGNUM_FP), - DEFINE_GPR_NAME(lr, "x30", GENERIC_REGNUM_RA), - DEFINE_GPR_NAME(sp, "xsp", GENERIC_REGNUM_SP), - DEFINE_GPR_NAME(pc, NULL, GENERIC_REGNUM_PC), + // For the G/g packet we want to show where the offset into the regctx + // is for fp/lr/sp/pc, but we cannot directly access them on arm64e + // devices (and therefore can't offsetof() them)) - add the offset based + // on the last accessible register by hand for advertising the location + // in the regctx to lldb. We'll go through the accessor functions when + // we read/write them here. + { + e_regSetGPR, gpr_fp, "fp", "x29", Uint, Hex, 8, GPR_OFFSET_IDX(28) + 8, + dwarf_fp, dwarf_fp, GENERIC_REGNUM_FP, debugserver_gpr_fp, NULL, NULL + }, + { + e_regSetGPR, gpr_lr, "lr", "x30", Uint, Hex, 8, GPR_OFFSET_IDX(28) + 16, + dwarf_lr, dwarf_lr, GENERIC_REGNUM_RA, debugserver_gpr_lr, NULL, NULL + }, + { + e_regSetGPR, gpr_sp, "sp", "xsp", Uint, Hex, 8, GPR_OFFSET_IDX(28) + 24, + dwarf_sp, dwarf_sp, GENERIC_REGNUM_SP, debugserver_gpr_sp, NULL, NULL + }, + { + e_regSetGPR, gpr_pc, "pc", NULL, Uint, Hex, 8, GPR_OFFSET_IDX(28) + 32, + dwarf_pc, dwarf_pc, GENERIC_REGNUM_PC, debugserver_gpr_pc, NULL, NULL + }, // in armv7 we specify that writing to the CPSR should invalidate r8-12, sp, // lr. @@ -1769,7 +1824,20 @@ bool DNBArchMachARM64::GetRegisterValue(uint32_t set, uint32_t reg, switch (set) { case e_regSetGPR: if (reg <= gpr_pc) { +#if defined(__LP64__) + if (reg == gpr_pc) + value->value.uint64 = arm_thread_state64_get_pc (m_state.context.gpr); + else if (reg == gpr_lr) + value->value.uint64 = arm_thread_state64_get_lr (m_state.context.gpr); + else if (reg == gpr_sp) + value->value.uint64 = arm_thread_state64_get_sp (m_state.context.gpr); + else if (reg == gpr_fp) + value->value.uint64 = arm_thread_state64_get_fp (m_state.context.gpr); + else + value->value.uint64 = m_state.context.gpr.__x[reg]; +#else value->value.uint64 = m_state.context.gpr.__x[reg]; +#endif return true; } else if (reg == gpr_cpsr) { value->value.uint32 = m_state.context.gpr.__cpsr; @@ -1859,7 +1927,27 @@ bool DNBArchMachARM64::SetRegisterValue(uint32_t set, uint32_t reg, switch (set) { case e_regSetGPR: if (reg <= gpr_pc) { +#if defined(__LP64__) + uint64_t signed_value = value->value.uint64; +#if __has_feature(ptrauth_calls) + // The incoming value could be garbage. Strip it to avoid + // trapping when it gets resigned in the thread state. + signed_value = (uint64_t) ptrauth_strip((void*) signed_value, ptrauth_key_function_pointer); + signed_value = (uint64_t) ptrauth_sign_unauthenticated((void*) signed_value, ptrauth_key_function_pointer, 0); +#endif + if (reg == gpr_pc) + arm_thread_state64_set_pc_fptr (m_state.context.gpr, (void*) signed_value); + else if (reg == gpr_lr) + arm_thread_state64_set_lr_fptr (m_state.context.gpr, (void*) signed_value); + else if (reg == gpr_sp) + arm_thread_state64_set_sp (m_state.context.gpr, value->value.uint64); + else if (reg == gpr_fp) + arm_thread_state64_set_fp (m_state.context.gpr, value->value.uint64); + else + m_state.context.gpr.__x[reg] = value->value.uint64; +#else m_state.context.gpr.__x[reg] = value->value.uint64; +#endif success = true; } else if (reg == gpr_cpsr) { m_state.context.gpr.__cpsr = value->value.uint32; diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index 61bdf0d8dac64..64e3bc49abc8c 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -4643,6 +4643,24 @@ static bool GetHostCPUType(uint32_t &cputype, uint32_t &cpusubtype, return g_host_cputype != 0; } +static bool GetAddressingBits(uint32_t &addressing_bits) { + static uint32_t g_addressing_bits = 0; + static bool g_tried_addressing_bits_syscall = false; + if (g_tried_addressing_bits_syscall == false) { + size_t len = sizeof (uint32_t); + if (::sysctlbyname("machdep.virtual_address_size", + &g_addressing_bits, &len, NULL, 0) != 0) { + g_addressing_bits = 0; + } + } + g_tried_addressing_bits_syscall = true; + addressing_bits = g_addressing_bits; + if (addressing_bits > 0) + return true; + else + return false; +} + rnb_err_t RNBRemote::HandlePacket_qHostInfo(const char *p) { std::ostringstream strm; @@ -4655,6 +4673,11 @@ rnb_err_t RNBRemote::HandlePacket_qHostInfo(const char *p) { strm << "cpusubtype:" << std::dec << cpusubtype << ';'; } + uint32_t addressing_bits = 0; + if (GetAddressingBits(addressing_bits)) { + strm << "addressing_bits:" << std::dec << addressing_bits << ';'; + } + // The OS in the triple should be "ios" or "macosx" which doesn't match our // "Darwin" which gets returned from "kern.ostype", so we need to hardcode // this for now. diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index fe4a17762f8bc..73874389aa1bb 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -797,7 +797,9 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) { llvm::Optional InitializeReproducer(opt::InputArgList &input_args) { if (auto *replay_path = input_args.getLastArg(OPT_replay)) { - if (const char *error = SBReproducer::Replay(replay_path->getValue())) { + const bool skip_version_check = input_args.hasArg(OPT_skip_version_check); + if (const char *error = + SBReproducer::Replay(replay_path->getValue(), skip_version_check)) { WithColor::error() << "reproducer replay failed: " << error << '\n'; return 1; } @@ -854,7 +856,7 @@ int main(int argc, char const *argv[]) { } // Register the reproducer signal handler. - llvm::sys::AddSignalHandler(reproducer_handler, (void *)(argv[0])); + llvm::sys::AddSignalHandler(reproducer_handler, const_cast(argv[0])); SBError error = SBDebugger::InitializeWithErrorHandling(); if (error.Fail()) { diff --git a/lldb/tools/driver/Options.td b/lldb/tools/driver/Options.td index 485c0d44bc848..c237f568f64c4 100644 --- a/lldb/tools/driver/Options.td +++ b/lldb/tools/driver/Options.td @@ -232,5 +232,7 @@ def capture_path: Separate<["--", "-"], "capture-path">, def replay: Separate<["--", "-"], "replay">, MetaVarName<"">, HelpText<"Tells the debugger to replay a reproducer from .">; +def skip_version_check: F<"reproducer-skip-version-check">, + HelpText<"Skip the reproducer version check.">; def REM : R<["--"], "">; diff --git a/lldb/tools/lldb-test/lldb-test.cpp b/lldb/tools/lldb-test/lldb-test.cpp index 66c8536301d52..12e4a56059796 100644 --- a/lldb/tools/lldb-test/lldb-test.cpp +++ b/lldb/tools/lldb-test/lldb-test.cpp @@ -549,7 +549,8 @@ Error opts::symbols::findVariables(lldb_private::Module &Module) { CompUnitSP CU; for (size_t Ind = 0; !CU && Ind < Module.GetNumCompileUnits(); ++Ind) { CompUnitSP Candidate = Module.GetCompileUnitAtIndex(Ind); - if (!Candidate || Candidate->GetFilename().GetStringRef() != File) + if (!Candidate || + Candidate->GetPrimaryFile().GetFilename().GetStringRef() != File) continue; if (CU) return make_string_error("Multiple compile units for file `{0}` found.", @@ -653,7 +654,8 @@ Error opts::symbols::verify(lldb_private::Module &Module) { if (!comp_unit) return make_string_error("Connot parse compile unit {0}.", i); - outs() << "Processing '" << comp_unit->GetFilename().AsCString() + outs() << "Processing '" + << comp_unit->GetPrimaryFile().GetFilename().AsCString() << "' compile unit.\n"; LineTable *lt = comp_unit->GetLineTable(); diff --git a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp index 150bef1590f4a..deb6c7d54ea9f 100644 --- a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp +++ b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp @@ -191,6 +191,8 @@ TEST(CPlusPlusLanguage, FindAlternateFunctionManglings) { EXPECT_THAT(FindAlternate("_ZN1A1fEx"), Contains("_ZN1A1fEl")); EXPECT_THAT(FindAlternate("_ZN1A1fEy"), Contains("_ZN1A1fEm")); EXPECT_THAT(FindAlternate("_ZN1A1fEai"), Contains("_ZN1A1fEci")); + EXPECT_THAT(FindAlternate("_ZN1AC1Ev"), Contains("_ZN1AC2Ev")); + EXPECT_THAT(FindAlternate("_ZN1AD1Ev"), Contains("_ZN1AD2Ev")); EXPECT_THAT(FindAlternate("_bogus"), IsEmpty()); } diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index 12ffdfe79ec32..8bc510bd989aa 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -62,6 +62,14 @@ extern "C" void init_lldb(void) {} #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreturn-type-c-linkage" +// Disable warning C4190: 'LLDBSwigPythonBreakpointCallbackFunction' has +// C-linkage specified, but returns UDT 'llvm::Expected' which is +// incompatible with C +#if _MSC_VER +#pragma warning (push) +#pragma warning (disable : 4190) +#endif + extern "C" llvm::Expected LLDBSwigPythonBreakpointCallbackFunction( const char *python_function_name, const char *session_dictionary_name, const lldb::StackFrameSP &sb_frame, @@ -70,6 +78,10 @@ extern "C" llvm::Expected LLDBSwigPythonBreakpointCallbackFunction( return false; } +#if _MSC_VER +#pragma warning (pop) +#endif + #pragma clang diagnostic pop extern "C" bool LLDBSwigPythonWatchpointCallbackFunction( diff --git a/lldb/unittests/Symbol/TestClangASTContext.cpp b/lldb/unittests/Symbol/TestClangASTContext.cpp index 44a824636cf73..8fb24acc7a6a1 100644 --- a/lldb/unittests/Symbol/TestClangASTContext.cpp +++ b/lldb/unittests/Symbol/TestClangASTContext.cpp @@ -169,10 +169,12 @@ TEST_F(TestClangASTContext, TestGetBasicTypeFromName) { EXPECT_EQ(GetBasicQualType(eBasicTypeNullPtr), GetBasicQualType("nullptr")); } -void VerifyEncodingAndBitSize(clang::ASTContext *context, +void VerifyEncodingAndBitSize(ClangASTContext &clang_context, lldb::Encoding encoding, unsigned int bit_size) { - CompilerType type = ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - context, encoding, bit_size); + clang::ASTContext *context = clang_context.getASTContext(); + + CompilerType type = + clang_context.GetBuiltinTypeForEncodingAndBitSize(encoding, bit_size); EXPECT_TRUE(type.IsValid()); QualType qtype = ClangUtil::GetQualType(type); @@ -206,8 +208,6 @@ void VerifyEncodingAndBitSize(clang::ASTContext *context, } TEST_F(TestClangASTContext, TestBuiltinTypeForEncodingAndBitSize) { - clang::ASTContext *context = m_ast->getASTContext(); - // Make sure we can get types of every possible size in every possible // encoding. // We can't make any guarantee about which specific type we get, because the @@ -215,20 +215,20 @@ TEST_F(TestClangASTContext, TestBuiltinTypeForEncodingAndBitSize) { // isn't that specific. We only need to make sure the compiler hands us some // type that // is both a builtin type and matches the requested bit size. - VerifyEncodingAndBitSize(context, eEncodingSint, 8); - VerifyEncodingAndBitSize(context, eEncodingSint, 16); - VerifyEncodingAndBitSize(context, eEncodingSint, 32); - VerifyEncodingAndBitSize(context, eEncodingSint, 64); - VerifyEncodingAndBitSize(context, eEncodingSint, 128); - - VerifyEncodingAndBitSize(context, eEncodingUint, 8); - VerifyEncodingAndBitSize(context, eEncodingUint, 16); - VerifyEncodingAndBitSize(context, eEncodingUint, 32); - VerifyEncodingAndBitSize(context, eEncodingUint, 64); - VerifyEncodingAndBitSize(context, eEncodingUint, 128); - - VerifyEncodingAndBitSize(context, eEncodingIEEE754, 32); - VerifyEncodingAndBitSize(context, eEncodingIEEE754, 64); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 8); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 16); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 32); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 64); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 128); + + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 8); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 16); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 32); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 64); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 128); + + VerifyEncodingAndBitSize(*m_ast, eEncodingIEEE754, 32); + VerifyEncodingAndBitSize(*m_ast, eEncodingIEEE754, 64); } TEST_F(TestClangASTContext, TestIsClangType) { diff --git a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp index 0470394d42555..e8a8690c1ff1f 100644 --- a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp +++ b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp @@ -109,7 +109,7 @@ class SymbolFilePDBTests : public testing::Test { const FileSpec &spec) const { for (size_t i = 0; i < sc_list.GetSize(); ++i) { const SymbolContext &sc = sc_list[i]; - if (FileSpecMatchesAsBaseOrFull(*sc.comp_unit, spec)) + if (FileSpecMatchesAsBaseOrFull(sc.comp_unit->GetPrimaryFile(), spec)) return true; } return false; diff --git a/lldb/unittests/Utility/ArchSpecTest.cpp b/lldb/unittests/Utility/ArchSpecTest.cpp index 0186ff05ead8a..9115808c12587 100644 --- a/lldb/unittests/Utility/ArchSpecTest.cpp +++ b/lldb/unittests/Utility/ArchSpecTest.cpp @@ -216,6 +216,41 @@ TEST(ArchSpecTest, MergeFrom) { EXPECT_EQ(llvm::Triple::EnvironmentType::UnknownEnvironment, A.GetTriple().getEnvironment()); } + { + ArchSpec A("arm--linux-eabihf"); + ArchSpec B("armv8l--linux-gnueabihf"); + + EXPECT_TRUE(A.IsValid()); + EXPECT_TRUE(B.IsValid()); + + EXPECT_EQ(llvm::Triple::ArchType::arm, A.GetTriple().getArch()); + EXPECT_EQ(llvm::Triple::ArchType::arm, B.GetTriple().getArch()); + + EXPECT_EQ(ArchSpec::eCore_arm_generic, A.GetCore()); + EXPECT_EQ(ArchSpec::eCore_arm_armv8l, B.GetCore()); + + EXPECT_EQ(llvm::Triple::VendorType::UnknownVendor, + A.GetTriple().getVendor()); + EXPECT_EQ(llvm::Triple::VendorType::UnknownVendor, + B.GetTriple().getVendor()); + + EXPECT_EQ(llvm::Triple::OSType::Linux, A.GetTriple().getOS()); + EXPECT_EQ(llvm::Triple::OSType::Linux, B.GetTriple().getOS()); + + EXPECT_EQ(llvm::Triple::EnvironmentType::EABIHF, + A.GetTriple().getEnvironment()); + EXPECT_EQ(llvm::Triple::EnvironmentType::GNUEABIHF, + B.GetTriple().getEnvironment()); + + A.MergeFrom(B); + EXPECT_EQ(llvm::Triple::ArchType::arm, A.GetTriple().getArch()); + EXPECT_EQ(ArchSpec::eCore_arm_armv8l, A.GetCore()); + EXPECT_EQ(llvm::Triple::VendorType::UnknownVendor, + A.GetTriple().getVendor()); + EXPECT_EQ(llvm::Triple::OSType::Linux, A.GetTriple().getOS()); + EXPECT_EQ(llvm::Triple::EnvironmentType::EABIHF, + A.GetTriple().getEnvironment()); + } } TEST(ArchSpecTest, MergeFromMachOUnknown) { diff --git a/lldb/unittests/Utility/FileSpecTest.cpp b/lldb/unittests/Utility/FileSpecTest.cpp index 0f5b1652d2989..d5f1091d5d469 100644 --- a/lldb/unittests/Utility/FileSpecTest.cpp +++ b/lldb/unittests/Utility/FileSpecTest.cpp @@ -12,6 +12,14 @@ using namespace lldb_private; +static FileSpec PosixSpec(llvm::StringRef path) { + return FileSpec(path, FileSpec::Style::posix); +} + +static FileSpec WindowsSpec(llvm::StringRef path) { + return FileSpec(path, FileSpec::Style::windows); +} + TEST(FileSpecTest, FileAndDirectoryComponents) { FileSpec fs_posix("/foo/bar", FileSpec::Style::posix); EXPECT_STREQ("/foo/bar", fs_posix.GetCString()); @@ -106,8 +114,7 @@ TEST(FileSpecTest, AppendPathComponent) { } TEST(FileSpecTest, CopyByAppendingPathComponent) { - FileSpec fs = FileSpec("/foo", FileSpec::Style::posix) - .CopyByAppendingPathComponent("bar"); + FileSpec fs = PosixSpec("/foo").CopyByAppendingPathComponent("bar"); EXPECT_STREQ("/foo/bar", fs.GetCString()); EXPECT_STREQ("/foo", fs.GetDirectory().GetCString()); EXPECT_STREQ("bar", fs.GetFilename().GetCString()); @@ -136,9 +143,7 @@ TEST(FileSpecTest, PrependPathComponent) { } TEST(FileSpecTest, EqualSeparator) { - FileSpec backward("C:\\foo\\bar", FileSpec::Style::windows); - FileSpec forward("C:/foo/bar", FileSpec::Style::windows); - EXPECT_EQ(forward, backward); + EXPECT_EQ(WindowsSpec("C:\\foo\\bar"), WindowsSpec("C:/foo/bar")); } TEST(FileSpecTest, EqualDotsWindows) { @@ -153,9 +158,8 @@ TEST(FileSpecTest, EqualDotsWindows) { }; for (const auto &test : tests) { - FileSpec one(test.first, FileSpec::Style::windows); - FileSpec two(test.second, FileSpec::Style::windows); - EXPECT_EQ(one, two); + SCOPED_TRACE(llvm::Twine(test.first) + " <=> " + test.second); + EXPECT_EQ(WindowsSpec(test.first), WindowsSpec(test.second)); } } @@ -169,9 +173,8 @@ TEST(FileSpecTest, EqualDotsPosix) { }; for (const auto &test : tests) { - FileSpec one(test.first, FileSpec::Style::posix); - FileSpec two(test.second, FileSpec::Style::posix); - EXPECT_EQ(one, two); + SCOPED_TRACE(llvm::Twine(test.first) + " <=> " + test.second); + EXPECT_EQ(PosixSpec(test.first), PosixSpec(test.second)); } } @@ -183,9 +186,8 @@ TEST(FileSpecTest, EqualDotsPosixRoot) { }; for (const auto &test : tests) { - FileSpec one(test.first, FileSpec::Style::posix); - FileSpec two(test.second, FileSpec::Style::posix); - EXPECT_EQ(one, two); + SCOPED_TRACE(llvm::Twine(test.first) + " <=> " + test.second); + EXPECT_EQ(PosixSpec(test.first), PosixSpec(test.second)); } } @@ -200,7 +202,7 @@ TEST(FileSpecTest, GuessPathStyle) { EXPECT_EQ(llvm::None, FileSpec::GuessPathStyle("foo/bar.txt")); } -TEST(FileSpecTest, GetNormalizedPath) { +TEST(FileSpecTest, GetPath) { std::pair posix_tests[] = { {"/foo/.././bar", "/bar"}, {"/foo/./../bar", "/bar"}, @@ -230,8 +232,7 @@ TEST(FileSpecTest, GetNormalizedPath) { }; for (auto test : posix_tests) { SCOPED_TRACE(llvm::Twine("test.first = ") + test.first); - EXPECT_EQ(test.second, - FileSpec(test.first, FileSpec::Style::posix).GetPath()); + EXPECT_EQ(test.second, PosixSpec(test.first).GetPath()); } std::pair windows_tests[] = { @@ -262,9 +263,8 @@ TEST(FileSpecTest, GetNormalizedPath) { {R"(..\..\foo)", R"(..\..\foo)"}, }; for (auto test : windows_tests) { - EXPECT_EQ(test.second, - FileSpec(test.first, FileSpec::Style::windows).GetPath()) - << "Original path: " << test.first; + SCOPED_TRACE(llvm::Twine("test.first = ") + test.first); + EXPECT_EQ(test.second, WindowsSpec(test.first).GetPath()); } } @@ -315,8 +315,8 @@ TEST(FileSpecTest, IsRelative) { "/foo/../.", }; for (const auto &path: not_relative) { - FileSpec spec(path, FileSpec::Style::posix); - EXPECT_FALSE(spec.IsRelative()); + SCOPED_TRACE(path); + EXPECT_FALSE(PosixSpec(path).IsRelative()); } llvm::StringRef is_relative[] = { ".", @@ -333,8 +333,8 @@ TEST(FileSpecTest, IsRelative) { "./foo/bar.c" }; for (const auto &path: is_relative) { - FileSpec spec(path, FileSpec::Style::posix); - EXPECT_TRUE(spec.IsRelative()); + SCOPED_TRACE(path); + EXPECT_TRUE(PosixSpec(path).IsRelative()); } } @@ -379,3 +379,44 @@ TEST(FileSpecTest, RemoveLastPathComponent) { EXPECT_FALSE(fs_windows.RemoveLastPathComponent()); EXPECT_STREQ("C:", fs_windows.GetCString()); } + +TEST(FileSpecTest, Equal) { + auto Eq = [](const char *a, const char *b, bool full) { + return FileSpec::Equal(PosixSpec(a), PosixSpec(b), full); + }; + EXPECT_TRUE(Eq("/foo/bar", "/foo/bar", true)); + EXPECT_TRUE(Eq("/foo/bar", "/foo/bar", false)); + + EXPECT_FALSE(Eq("/foo/bar", "/foo/baz", true)); + EXPECT_FALSE(Eq("/foo/bar", "/foo/baz", false)); + + EXPECT_FALSE(Eq("/bar/foo", "/baz/foo", true)); + EXPECT_FALSE(Eq("/bar/foo", "/baz/foo", false)); + + EXPECT_FALSE(Eq("/bar/foo", "foo", true)); + EXPECT_TRUE(Eq("/bar/foo", "foo", false)); + + EXPECT_FALSE(Eq("foo", "/bar/foo", true)); + EXPECT_TRUE(Eq("foo", "/bar/foo", false)); +} + +TEST(FileSpecTest, Match) { + auto Match = [](const char *pattern, const char *file) { + return FileSpec::Match(PosixSpec(pattern), PosixSpec(file)); + }; + EXPECT_TRUE(Match("/foo/bar", "/foo/bar")); + EXPECT_FALSE(Match("/foo/bar", "/oof/bar")); + EXPECT_FALSE(Match("/foo/bar", "/foo/baz")); + EXPECT_FALSE(Match("/foo/bar", "bar")); + EXPECT_FALSE(Match("/foo/bar", "")); + + EXPECT_TRUE(Match("bar", "/foo/bar")); + EXPECT_FALSE(Match("bar", "/foo/baz")); + EXPECT_TRUE(Match("bar", "bar")); + EXPECT_FALSE(Match("bar", "baz")); + EXPECT_FALSE(Match("bar", "")); + + EXPECT_TRUE(Match("", "/foo/bar")); + EXPECT_TRUE(Match("", "")); + +} diff --git a/lldb/unittests/Utility/StreamTest.cpp b/lldb/unittests/Utility/StreamTest.cpp index 2e2bcb344fcdf..6e42ac2d11f0a 100644 --- a/lldb/unittests/Utility/StreamTest.cpp +++ b/lldb/unittests/Utility/StreamTest.cpp @@ -36,6 +36,98 @@ struct BinaryStreamTest : StreamTest { }; } +TEST_F(StreamTest, AddressPrefix) { + s.Address(0x1, 1, "foo"); + EXPECT_EQ("foo0x01", TakeValue()); +} + +TEST_F(StreamTest, AddressEmptyPrefix) { + s.Address(0x1, 1, nullptr); + EXPECT_EQ("0x01", TakeValue()); + s.Address(0x1, 1, ""); + EXPECT_EQ("0x01", TakeValue()); +} + +TEST_F(StreamTest, AddressSuffix) { + s.Address(0x1, 1, nullptr, "foo"); + EXPECT_EQ("0x01foo", TakeValue()); +} + +TEST_F(StreamTest, AddressNoSuffix) { + s.Address(0x1, 1, nullptr, nullptr); + EXPECT_EQ("0x01", TakeValue()); + s.Address(0x1, 1, nullptr, ""); + EXPECT_EQ("0x01", TakeValue()); +} + +TEST_F(StreamTest, AddressPrefixAndSuffix) { + s.Address(0x1, 1, "foo", "bar"); + EXPECT_EQ("foo0x01bar", TakeValue()); +} + +TEST_F(StreamTest, AddressSize) { + s.Address(0x0, 0); + EXPECT_EQ("0x0", TakeValue()); + s.Address(0x1, 0); + EXPECT_EQ("0x1", TakeValue()); + + s.Address(0x1, 1); + EXPECT_EQ("0x01", TakeValue()); + s.Address(0xf1, 1); + EXPECT_EQ("0xf1", TakeValue()); + s.Address(0xff, 1); + EXPECT_EQ("0xff", TakeValue()); + s.Address(0x100, 1); + EXPECT_EQ("0x100", TakeValue()); + + s.Address(0xf00, 4); + EXPECT_EQ("0x00000f00", TakeValue()); + s.Address(0x100, 8); + EXPECT_EQ("0x0000000000000100", TakeValue()); + s.Address(0x100, 10); + EXPECT_EQ("0x00000000000000000100", TakeValue()); + s.Address(0x1234, 10); + EXPECT_EQ("0x00000000000000001234", TakeValue()); +} + +TEST_F(StreamTest, AddressRange) { + s.AddressRange(0x100, 0x101, 2); + EXPECT_EQ("[0x0100-0x0101)", TakeValue()); +} + +TEST_F(StreamTest, AddressRangeEmptyRange) { + s.AddressRange(0x100, 0x100, 2); + EXPECT_EQ("[0x0100-0x0100)", TakeValue()); + s.AddressRange(0x0, 0x0, 2); + EXPECT_EQ("[0x0000-0x0000)", TakeValue()); +} + +TEST_F(StreamTest, AddressRangeInvalidRange) { + s.AddressRange(0x100, 0x0FF, 2); + EXPECT_EQ("[0x0100-0x00ff)", TakeValue()); + s.AddressRange(0x100, 0x0, 2); + EXPECT_EQ("[0x0100-0x0000)", TakeValue()); +} + +TEST_F(StreamTest, AddressRangeSize) { + s.AddressRange(0x100, 0x101, 0); + EXPECT_EQ("[0x100-0x101)", TakeValue()); + s.AddressRange(0x100, 0x101, 2); + EXPECT_EQ("[0x0100-0x0101)", TakeValue()); + s.AddressRange(0x100, 0x101, 4); + EXPECT_EQ("[0x00000100-0x00000101)", TakeValue()); + + s.AddressRange(0x100, 0x101, 4); + EXPECT_EQ("[0x00000100-0x00000101)", TakeValue()); + s.AddressRange(0x1, 0x101, 4); + EXPECT_EQ("[0x00000001-0x00000101)", TakeValue()); + s.AddressRange(0x101, 0x1, 4); + EXPECT_EQ("[0x00000101-0x00000001)", TakeValue()); + + s.AddressRange(0x1, 0x101, 1); + EXPECT_EQ("[0x01-0x101)", TakeValue()); +} + TEST_F(StreamTest, ChangingByteOrder) { s.SetByteOrder(lldb::eByteOrderPDP); EXPECT_EQ(lldb::eByteOrderPDP, s.GetByteOrder()); @@ -295,24 +387,6 @@ TEST_F(StreamTest, ShiftOperatorStrings) { EXPECT_EQ("cstring\nllvm::StringRef\n", TakeValue()); } -TEST_F(StreamTest, ShiftOperatorInts) { - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max(); - EXPECT_EQ(40U, s.GetWrittenBytes()); - EXPECT_EQ("127 32767 2147483647 9223372036854775807", TakeValue()); -} - -TEST_F(StreamTest, ShiftOperatorUInts) { - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max(); - EXPECT_EQ(33U, s.GetWrittenBytes()); - EXPECT_EQ("ff ffff ffffffff ffffffffffffffff", TakeValue()); -} - TEST_F(StreamTest, ShiftOperatorPtr) { // This test is a bit tricky because pretty much everything related to // pointer printing seems to lead to UB or IB. So let's make the most basic diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index b1a51b332ff0d..1479e29b4a3ac 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -530,10 +530,6 @@ option(LLVM_BUILD_EXAMPLES "Build the LLVM example programs. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) -if(LLVM_BUILD_EXAMPLES) - add_definitions(-DBUILD_EXAMPLES) -endif(LLVM_BUILD_EXAMPLES) - option(LLVM_BUILD_TESTS "Build LLVM unit tests. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) diff --git a/llvm/bindings/go/llvm/dibuilder.go b/llvm/bindings/go/llvm/dibuilder.go index e845369271602..10e18e14d9895 100644 --- a/llvm/bindings/go/llvm/dibuilder.go +++ b/llvm/bindings/go/llvm/dibuilder.go @@ -504,6 +504,7 @@ type DITypedef struct { File Metadata Line int Context Metadata + AlignInBits uint32 } // CreateTypedef creates typedef type debug metadata. @@ -518,6 +519,7 @@ func (d *DIBuilder) CreateTypedef(t DITypedef) Metadata { t.File.C, C.unsigned(t.Line), t.Context.C, + C.uint32_t(t.AlignInBits), ) return Metadata{C: result} } diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 028a2cc86bf38..e7e5e5dcf2ff3 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -166,7 +166,6 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") else() include_directories(${LIBXML2_INCLUDE_DIR}) endif() - set(LIBXML2_LIBS "xml2") endif() endif() endif() diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 7fdca536c1fdb..082393212b674 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -35,6 +35,8 @@ set(TARGET_TRIPLE "@TARGET_TRIPLE@") set(LLVM_ABI_BREAKING_CHECKS @LLVM_ABI_BREAKING_CHECKS@) +set(LLVM_ENABLE_EXPENSIVE_CHECKS @LLVM_ENABLE_EXPENSIVE_CHECKS@) + set(LLVM_ENABLE_ASSERTIONS @LLVM_ENABLE_ASSERTIONS@) set(LLVM_ENABLE_EH @LLVM_ENABLE_EH@) diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index ff70a21b8dfcc..40aeecdf2c81a 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -406,17 +406,38 @@ PRE-DEFINED SUBSTITUTIONS :program:`lit` provides various patterns that can be used with the RUN command. These are defined in TestRunner.py. The base set of substitutions are: - ========== ============== - Macro Substitution - ========== ============== - %s source path (path to the file currently being run) - %S source dir (directory of the file currently being run) - %p same as %S - %{pathsep} path separator - %t temporary file name unique to the test - %T parent directory of %t (not unique, deprecated, do not use) - %% % - ========== ============== + ======================= ============== + Macro Substitution + ======================= ============== + %s source path (path to the file currently being run) + %S source dir (directory of the file currently being run) + %p same as %S + %{pathsep} path separator + %t temporary file name unique to the test + %basename_t The last path component of %t but without the ``.tmp`` extension + %T parent directory of %t (not unique, deprecated, do not use) + %% % + %/s %s but ``\`` is replaced by ``/`` + %/S %S but ``\`` is replaced by ``/`` + %/p %p but ``\`` is replaced by ``/`` + %/t %t but ``\`` is replaced by ``/`` + %/T %T but ``\`` is replaced by ``/`` + %{/s:regex_replacement} %/s but escaped for use in the replacement of a ``s@@@`` command in sed + %{/S:regex_replacement} %/S but escaped for use in the replacement of a ``s@@@`` command in sed + %{/p:regex_replacement} %/p but escaped for use in the replacement of a ``s@@@`` command in sed + %{/t:regex_replacement} %/t but escaped for use in the replacement of a ``s@@@`` command in sed + %{/T:regex_replacement} %/T but escaped for use in the replacement of a ``s@@@`` command in sed + %:s On Windows, %/s but a ``:`` is removed if its the second character. + Otherwise, %s but with a single leading ``/`` removed. + %:S On Windows, %/S but a ``:`` is removed if its the second character. + Otherwise, %S but with a single leading ``/`` removed. + %:p On Windows, %/p but a ``:`` is removed if its the second character. + Otherwise, %p but with a single leading ``/`` removed. + %:t On Windows, %/t but a ``:`` is removed if its the second character. + Otherwise, %t but with a single leading ``/`` removed. + %:T On Windows, %/T but a ``:`` is removed if its the second character. + Otherwise, %T but with a single leading ``/`` removed. + ======================= ============== Other substitutions are provided that are variations on this base set and further substitution patterns can be defined by each test module. See the diff --git a/llvm/docs/Contributing.rst b/llvm/docs/Contributing.rst index 67adc45e1dcc0..2ad0d9080e12d 100644 --- a/llvm/docs/Contributing.rst +++ b/llvm/docs/Contributing.rst @@ -45,7 +45,6 @@ you are interested in working on any of these projects, please send a mail to the `LLVM Developer's mailing list`_, so that we know the project is being worked on. - How to Submit a Patch ===================== Once you have a patch ready, it is time to submit it. The patch should: @@ -55,6 +54,35 @@ Once you have a patch ready, it is time to submit it. The patch should: * not contain any unrelated changes * be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier. +.. _format patches: + +Before sending a patch for review, please also try to ensure it is +formatted properly. We use ``clang-format`` for this, which has git integration +through the ``git-clang-format`` script. On some systems, it may already be +installed (or be installable via your package manager). If so, you can simply +run it -- the following command will format only the code changed in the most +recent commit: + +.. code-block:: console + + % git clang-format HEAD~1 + +Note that this modifies the files, but doesn't commit them -- you'll likely want +to run + +.. code-block:: console + + % git commit --amend -a + +in order to update the last commit with all pending changes. + +.. note:: + If you don't already have ``clang-format`` or ``git clang-format`` installed + on your system, the ``clang-format`` binary will be built alongside clang, and + the git integration can be run from + ``clang/tools/clang-format/git-clang-format``. + + To get a patch accepted, it has to be reviewed by the LLVM community. This can be done using `LLVM's Phabricator`_ or the llvm-commits mailing list. Please follow :ref:`Phabricator#requesting-a-review-via-the-web-interface ` diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 52f8e392ce86d..aa37e00b50563 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3334,6 +3334,9 @@ Poison value behavior is defined in terms of value *dependence*: be different if the terminator had transferred control to a different successor. - Dependence is transitive. +- Vector elements may be independently poisoned. Therefore, transforms + on instructions such as shufflevector must be careful to propagate + poison across values or elements only as allowed by the original code. An instruction that *depends* on a poison value, produces a poison value itself. A poison value may be relaxed into an @@ -8448,10 +8451,13 @@ Semantics: The elements of the two input vectors are numbered from left to right across both of the vectors. The shuffle mask operand specifies, for each element of the result vector, which element of the two input vectors the -result element gets. If the shuffle mask is undef, the result vector is -undef. If any element of the mask operand is undef, that element of the -result is undef. If the shuffle mask selects an undef element from one -of the input vectors, the resulting element is undef. +result element gets. + +If the shuffle mask is undef, the result vector is undef. If any element +of the mask operand is undef, that element of the result is undef. If the +shuffle mask selects an undef element from one of the input vectors, the +resulting element is undef. An undef mask element prevents a poisoned +vector element from propagating. For scalable vectors, the only valid mask values at present are ``zeroinitializer`` and ``undef``, since we cannot write all indices as diff --git a/llvm/docs/Phabricator.rst b/llvm/docs/Phabricator.rst index ca23ab3f13078..7de8dc1e6a0bf 100644 --- a/llvm/docs/Phabricator.rst +++ b/llvm/docs/Phabricator.rst @@ -62,6 +62,9 @@ to upload your patch): * ``git format-patch -U999999 @{u}`` * ``svn diff --diff-cmd=diff -x -U999999`` +Before uploading your patch, please make sure it is formatted properly, as +described in :ref:`How to Submit a Patch `. + To upload a new patch: * Click *Differential*. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index e85a85053fb96..c27f3bc8b692e 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -166,6 +166,16 @@ Changes to the OCaml bindings Changes to the C API -------------------- +* C DebugInfo API ``LLVMDIBuilderCreateTypedef`` is updated to include an extra +argument ``AlignInBits``, to facilitate / propagate specified Alignment information +present in a ``typedef`` to Debug information in LLVM IR. + + +Changes to the Go bindings +-------------------------- +* Go DebugInfo API ``CreateTypedef`` is updated to include an extra argument ``AlignInBits``, +to facilitate / propagate specified Alignment information present in a ``typedef`` +to Debug information in LLVM IR. Changes to the DAG infrastructure diff --git a/llvm/examples/CMakeLists.txt b/llvm/examples/CMakeLists.txt index 1fbcbf793b2da..ad99d4c7e3127 100644 --- a/llvm/examples/CMakeLists.txt +++ b/llvm/examples/CMakeLists.txt @@ -2,7 +2,6 @@ add_subdirectory(BrainF) add_subdirectory(Fibonacci) add_subdirectory(HowToUseJIT) add_subdirectory(HowToUseLLJIT) -add_subdirectory(IRTransforms) add_subdirectory(LLJITExamples) add_subdirectory(Kaleidoscope) add_subdirectory(ModuleMaker) diff --git a/llvm/examples/IRTransforms/CMakeLists.txt b/llvm/examples/IRTransforms/CMakeLists.txt deleted file mode 100644 index 1c3185eed5ff2..0000000000000 --- a/llvm/examples/IRTransforms/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Analysis - Core - Support - ) - -add_llvm_library(ExampleIRTransforms - InitializePasses.cpp - SimplifyCFG.cpp - - ADDITIONAL_HEADER_DIRS - - DEPENDS - intrinsics_gen - ) diff --git a/llvm/examples/IRTransforms/InitializePasses.cpp b/llvm/examples/IRTransforms/InitializePasses.cpp deleted file mode 100644 index 125180715cd41..0000000000000 --- a/llvm/examples/IRTransforms/InitializePasses.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- InitializePasses.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements implements the initialization hook for the example -// transforms. -// -//===----------------------------------------------------------------------===// - -#include "InitializePasses.h" -#include "llvm/PassRegistry.h" - -using namespace llvm; - -void initializeExampleIRTransforms(PassRegistry &Registry) { - initializeSimplifyCFGLegacyPassPass(Registry); -} diff --git a/llvm/examples/IRTransforms/InitializePasses.h b/llvm/examples/IRTransforms/InitializePasses.h deleted file mode 100644 index 8b6673d518e63..0000000000000 --- a/llvm/examples/IRTransforms/InitializePasses.h +++ /dev/null @@ -1,22 +0,0 @@ -//===- InitializePasses.h - -------------------------------------*- C++ -*-===// -// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXAMPLES_IRTRANSFORMS_INITIALIZEPASSES__H -#define LLVM_EXAMPLES_IRTRANSFORMS_INITIALIZEPASSES__H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -void initializeExampleIRTransforms(PassRegistry &Registry); -void initializeSimplifyCFGLegacyPassPass(PassRegistry &Registry); - -} // end namespace llvm - -#endif diff --git a/llvm/examples/IRTransforms/SimplifyCFG.cpp b/llvm/examples/IRTransforms/SimplifyCFG.cpp deleted file mode 100644 index 10658c9f09590..0000000000000 --- a/llvm/examples/IRTransforms/SimplifyCFG.cpp +++ /dev/null @@ -1,414 +0,0 @@ -//===- SimplifyCFG.cpp ----------------------------------------------------===// -// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the control flow graph (CFG) simplifications -// presented as part of the 'Getting Started With LLVM: Basics' tutorial at the -// US LLVM Developers Meeting 2019. It also contains additional material. -// -// The current file contains three different CFG simplifications. There are -// multiple versions of each implementation (e.g. _v1 and _v2), which implement -// additional functionality (e.g. preserving analysis like the DominatorTree) or -// use additional utilities to simplify the code (e.g. LLVM's PatternMatch.h). -// The available simplifications are: -// 1. Trivially Dead block Removal (removeDeadBlocks_v[1,2]). -// This simplifications removes all blocks without predecessors in the CFG -// from a function. -// 2. Conditional Branch Elimination (eliminateCondBranches_v[1,2,3]) -// This simplification replaces conditional branches with constant integer -// conditions with unconditional branches. -// 3. Single Predecessor Block Merging (mergeIntoSinglePredecessor_v[1,2]) -// This simplification merges blocks with a single predecessor into the -// predecessor, if that block has a single successor. -// -// TODOs -// * Hook up pass to the new pass manager. -// * Preserve LoopInfo. -// * Add fixed point iteration to delete all dead blocks -// * Add implementation using reachability to discover dead blocks. -//===----------------------------------------------------------------------===// - -#include "SimplifyCFG.h" -#include "InitializePasses.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; -using namespace PatternMatch; - -enum TutorialVersion { V1, V2, V3 }; -static cl::opt - Version("tut-simplifycfg-version", cl::desc("Select tutorial version"), - cl::Hidden, cl::ValueOptional, cl::init(V1), - cl::values(clEnumValN(V1, "v1", "version 1"), - clEnumValN(V2, "v2", "version 2"), - clEnumValN(V3, "v3", "version 3"), - // Sentinel value for unspecified option. - clEnumValN(V3, "", ""))); - -#define DEBUG_TYPE "tut-simplifycfg" - -// Remove trivially dead blocks. First version, not preserving the -// DominatorTree. -static bool removeDeadBlocks_v1(Function &F) { - bool Changed = false; - - // Remove trivially dead blocks. - for (BasicBlock &BB : make_early_inc_range(F)) { - // Skip blocks we know to not be trivially dead. We know a block is - // guaranteed to be dead, iff it is neither the entry block nor - // has any predecessors. - if (&F.getEntryBlock() == &BB || !pred_empty(&BB)) - continue; - - // Notify successors of BB that BB is going to be removed. This removes - // incoming values from BB from PHIs in the successors. Note that this will - // not actually remove BB from the predecessor lists of its successors. - for (BasicBlock *Succ : successors(&BB)) - Succ->removePredecessor(&BB); - // TODO: Find a better place to put such small variations. - // Alternatively, we can update the PHI nodes manually: - // for (PHINode &PN : make_early_inc_range(Succ->phis())) - // PN.removeIncomingValue(&BB); - - // Replace all instructions in BB with an undef constant. The block is - // unreachable, so the results of the instructions should never get used. - while (!BB.empty()) { - Instruction &I = BB.back(); - I.replaceAllUsesWith(UndefValue::get(I.getType())); - I.eraseFromParent(); - } - - // Finally remove the basic block. - BB.eraseFromParent(); - Changed = true; - } - - return Changed; -} - -// Remove trivially dead blocks. This is the second version and preserves the -// dominator tree. -static bool removeDeadBlocks_v2(Function &F, DominatorTree &DT) { - bool Changed = false; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - - // Remove trivially dead blocks. - for (BasicBlock &BB : make_early_inc_range(F)) { - // Skip blocks we know to not be trivially dead. We know a block is - // guaranteed to be dead, iff it is neither the entry block nor - // has any predecessors. - if (&F.getEntryBlock() == &BB || !pred_empty(&BB)) - continue; - - // Notify successors of BB that BB is going to be removed. This removes - // incoming values from BB from PHIs in the successors. Note that this will - // not actually remove BB from the predecessor lists of its successors. - for (BasicBlock *Succ : successors(&BB)) { - Succ->removePredecessor(&BB); - - // Collect updates that need to be applied to the dominator tree. - DTUpdates.push_back({DominatorTree::Delete, &BB, Succ}); - } - - // Remove BB via the DomTreeUpdater. DomTreeUpdater::deleteBB conveniently - // removes the instructions in BB as well. - DTU.deleteBB(&BB); - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - - return Changed; -} - -// Eliminate branches with constant conditionals. This is the first version, -// which *does not* preserve the dominator tree. -static bool eliminateCondBranches_v1(Function &F) { - bool Changed = false; - - // Eliminate branches with constant conditionals. - for (BasicBlock &BB : F) { - // Skip blocks without conditional branches as terminators. - BranchInst *BI = dyn_cast(BB.getTerminator()); - if (!BI || !BI->isConditional()) - continue; - - // Skip blocks with conditional branches without ConstantInt conditions. - ConstantInt *CI = dyn_cast(BI->getCondition()); - if (!CI) - continue; - - // We use the branch condition (CI), to select the successor we remove: - // if CI == 1 (true), we remove the second successor, otherwise the first. - BasicBlock *RemovedSucc = BI->getSuccessor(CI->isOne()); - // Tell RemovedSucc we will remove BB from its predecessors. - RemovedSucc->removePredecessor(&BB); - - // Replace the conditional branch with an unconditional one, by creating - // a new unconditional branch to the selected successor and removing the - // conditional one. - BranchInst::Create(BI->getSuccessor(CI->isZero()), BI); - BI->eraseFromParent(); - Changed = true; - } - - return Changed; -} - -// Eliminate branches with constant conditionals. This is the second -// version, which *does* preserve the dominator tree. -static bool eliminateCondBranches_v2(Function &F, DominatorTree &DT) { - bool Changed = false; - - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - // Eliminate branches with constant conditionals. - for (BasicBlock &BB : F) { - // Skip blocks without conditional branches as terminators. - BranchInst *BI = dyn_cast(BB.getTerminator()); - if (!BI || !BI->isConditional()) - continue; - - // Skip blocks with conditional branches without ConstantInt conditions. - ConstantInt *CI = dyn_cast(BI->getCondition()); - if (!CI) - continue; - - // We use the branch condition (CI), to select the successor we remove: - // if CI == 1 (true), we remove the second successor, otherwise the first. - BasicBlock *RemovedSucc = BI->getSuccessor(CI->isOne()); - // Tell RemovedSucc we will remove BB from its predecessors. - RemovedSucc->removePredecessor(&BB); - - // Replace the conditional branch with an unconditional one, by creating - // a new unconditional branch to the selected successor and removing the - // conditional one. - BranchInst *NewBranch = - BranchInst::Create(BI->getSuccessor(CI->isZero()), BI); - BI->eraseFromParent(); - - // Delete the edge between BB and RemovedSucc in the DominatorTree, iff - // the conditional branch did not use RemovedSucc as both the true and false - // branches. - if (NewBranch->getSuccessor(0) != RemovedSucc) - DTUpdates.push_back({DominatorTree::Delete, &BB, RemovedSucc}); - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - - return Changed; -} - -// Eliminate branches with constant conditionals. This is the third -// version, which uses PatternMatch.h. -static bool eliminateCondBranches_v3(Function &F, DominatorTree &DT) { - bool Changed = false; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - - // Eliminate branches with constant conditionals. - for (BasicBlock &BB : F) { - ConstantInt *CI = nullptr; - BasicBlock *TakenSucc, *RemovedSucc; - // Check if the terminator is a conditional branch, with constant integer - // condition and also capture the successor blocks as TakenSucc and - // RemovedSucc. - if (!match(BB.getTerminator(), - m_Br(m_ConstantInt(CI), m_BasicBlock(TakenSucc), - m_BasicBlock(RemovedSucc)))) - continue; - - // If the condition is false, swap TakenSucc and RemovedSucc. - if (CI->isZero()) - std::swap(TakenSucc, RemovedSucc); - - // Tell RemovedSucc we will remove BB from its predecessors. - RemovedSucc->removePredecessor(&BB); - - // Replace the conditional branch with an unconditional one, by creating - // a new unconditional branch to the selected successor and removing the - // conditional one. - - BranchInst *NewBranch = BranchInst::Create(TakenSucc, BB.getTerminator()); - BB.getTerminator()->eraseFromParent(); - - // Delete the edge between BB and RemovedSucc in the DominatorTree, iff - // the conditional branch did not use RemovedSucc as both the true and false - // branches. - if (NewBranch->getSuccessor(0) != RemovedSucc) - DTUpdates.push_back({DominatorTree::Delete, &BB, RemovedSucc}); - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - return Changed; -} - -// Merge basic blocks into their single predecessor, if their predecessor has a -// single successor. This is the first version and does not preserve the -// DominatorTree. -static bool mergeIntoSinglePredecessor_v1(Function &F) { - bool Changed = false; - - // Merge blocks with single predecessors. - for (BasicBlock &BB : make_early_inc_range(F)) { - BasicBlock *Pred = BB.getSinglePredecessor(); - // Make sure BB has a single predecessor Pred and BB is the single - // successor of Pred. - if (!Pred || Pred->getSingleSuccessor() != &BB) - continue; - - // Do not try to merge self loops. That can happen in dead blocks. - if (Pred == &BB) - continue; - - // Need to replace it before nuking the branch. - BB.replaceAllUsesWith(Pred); - // PHI nodes in BB can only have a single incoming value. Remove them. - for (PHINode &PN : make_early_inc_range(BB.phis())) { - PN.replaceAllUsesWith(PN.getIncomingValue(0)); - PN.eraseFromParent(); - } - // Move all instructions from BB to Pred. - for (Instruction &I : make_early_inc_range(BB)) - I.moveBefore(Pred->getTerminator()); - - // Remove the Pred's terminator (which jumped to BB). BB's terminator - // will become Pred's terminator. - Pred->getTerminator()->eraseFromParent(); - BB.eraseFromParent(); - - Changed = true; - } - - return Changed; -} - -// Merge basic blocks into their single predecessor, if their predecessor has a -// single successor. This is the second version and does preserve the -// DominatorTree. -static bool mergeIntoSinglePredecessor_v2(Function &F, DominatorTree &DT) { - bool Changed = false; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - - // Merge blocks with single predecessors. - for (BasicBlock &BB : make_early_inc_range(F)) { - BasicBlock *Pred = BB.getSinglePredecessor(); - // Make sure BB has a single predecessor Pred and BB is the single - // successor of Pred. - if (!Pred || Pred->getSingleSuccessor() != &BB) - continue; - - // Do not try to merge self loops. That can happen in dead blocks. - if (Pred == &BB) - continue; - - // Tell DTU about the changes to the CFG: All edges from BB to its - // successors get removed and we add edges between Pred and BB's successors. - for (BasicBlock *Succ : successors(&BB)) { - DTUpdates.push_back({DominatorTree::Delete, &BB, Succ}); - DTUpdates.push_back({DominatorTree::Insert, Pred, Succ}); - } - // Also remove the edge between Pred and BB. - DTUpdates.push_back({DominatorTree::Delete, Pred, &BB}); - - // Need to replace it before nuking the branch. - BB.replaceAllUsesWith(Pred); - // PHI nodes in BB can only have a single incoming value. Remove them. - for (PHINode &PN : make_early_inc_range(BB.phis())) { - PN.replaceAllUsesWith(PN.getIncomingValue(0)); - PN.eraseFromParent(); - } - // Move all instructions from BB to Pred. - for (Instruction &I : make_early_inc_range(BB)) - I.moveBefore(Pred->getTerminator()); - - // Remove the Pred's terminator (which jumped to BB). BB's terminator - // will become Pred's terminator. - Pred->getTerminator()->eraseFromParent(); - DTU.deleteBB(&BB); - - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - return Changed; -} - -static bool doSimplify_v1(Function &F) { - return eliminateCondBranches_v1(F) & mergeIntoSinglePredecessor_v1(F) & - removeDeadBlocks_v1(F); -} - -static bool doSimplify_v2(Function &F, DominatorTree &DT) { - return eliminateCondBranches_v2(F, DT) & - mergeIntoSinglePredecessor_v2(F, DT) & removeDeadBlocks_v2(F, DT); -} - -static bool doSimplify_v3(Function &F, DominatorTree &DT) { - return eliminateCondBranches_v3(F, DT) & - mergeIntoSinglePredecessor_v2(F, DT) & removeDeadBlocks_v2(F, DT); -} - -namespace { -struct SimplifyCFGLegacyPass : public FunctionPass { - static char ID; - SimplifyCFGLegacyPass() : FunctionPass(ID) { - initializeSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - // Version 1 of the implementation does not preserve the dominator tree. - if (Version != V1) - AU.addPreserved(); - - FunctionPass::getAnalysisUsage(AU); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - - switch (Version) { - case V1: - return doSimplify_v1(F); - case V2: { - auto &DT = getAnalysis().getDomTree(); - return doSimplify_v2(F, DT); - } - case V3: { - auto &DT = getAnalysis().getDomTree(); - return doSimplify_v3(F, DT); - } - } - - llvm_unreachable("Unsupported version"); - } -}; -} // namespace - -char SimplifyCFGLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(SimplifyCFGLegacyPass, DEBUG_TYPE, - "Tutorial CFG simplification", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(SimplifyCFGLegacyPass, DEBUG_TYPE, - "Tutorial CFG simplifications", false, false) diff --git a/llvm/examples/IRTransforms/SimplifyCFG.h b/llvm/examples/IRTransforms/SimplifyCFG.h deleted file mode 100644 index 09328afb01d36..0000000000000 --- a/llvm/examples/IRTransforms/SimplifyCFG.h +++ /dev/null @@ -1,24 +0,0 @@ -//===- SimplifyCFG.h - Tutorial SimplifyCFG ---------------------*- C++ -*-===// -// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXAMPLES_IRTRANSFORMS_SIMPLIFYCFG__H -#define LLVM_EXAMPLES_IRTRANSFORMS_SIMPLIFYCFG__H - -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" - -namespace llvm { - -FunctionPass *createSimplifyCFGPass(); - -void initializeSimplifyCFGLegacyPassPass(PassRegistry &); - -} // end namespace llvm - -#endif // LLVM_EXAMPLES_IRTRANSFORMS_SIMPLIFYCFG__H diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h index a7fa3afc470cd..020b72c23947a 100644 --- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h +++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h @@ -39,14 +39,17 @@ class KaleidoscopeJIT { MangleAndInterner Mangle; ThreadSafeContext Ctx; + JITDylib &MainJD; + public: KaleidoscopeJIT(JITTargetMachineBuilder JTMB, DataLayout DL) : ObjectLayer(ES, []() { return std::make_unique(); }), CompileLayer(ES, ObjectLayer, ConcurrentIRCompiler(std::move(JTMB))), DL(std::move(DL)), Mangle(ES, this->DL), - Ctx(std::make_unique()) { - ES.getMainJITDylib().addGenerator( + Ctx(std::make_unique()), + MainJD(ES.createJITDylib("
")) { + MainJD.addGenerator( cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess( DL.getGlobalPrefix()))); } @@ -69,12 +72,11 @@ class KaleidoscopeJIT { LLVMContext &getContext() { return *Ctx.getContext(); } Error addModule(std::unique_ptr M) { - return CompileLayer.add(ES.getMainJITDylib(), - ThreadSafeModule(std::move(M), Ctx)); + return CompileLayer.add(MainJD, ThreadSafeModule(std::move(M), Ctx)); } Expected lookup(StringRef Name) { - return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name.str())); + return ES.lookup({&MainJD}, Mangle(Name.str())); } }; diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h index e9999efd37a51..8037e58ae4f72 100644 --- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h +++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h @@ -45,15 +45,17 @@ class KaleidoscopeJIT { MangleAndInterner Mangle; ThreadSafeContext Ctx; + JITDylib &MainJD; + public: KaleidoscopeJIT(JITTargetMachineBuilder JTMB, DataLayout DL) : ObjectLayer(ES, []() { return std::make_unique(); }), CompileLayer(ES, ObjectLayer, ConcurrentIRCompiler(std::move(JTMB))), - OptimizeLayer(ES, CompileLayer, optimizeModule), - DL(std::move(DL)), Mangle(ES, this->DL), - Ctx(std::make_unique()) { - ES.getMainJITDylib().addGenerator( + OptimizeLayer(ES, CompileLayer, optimizeModule), DL(std::move(DL)), + Mangle(ES, this->DL), Ctx(std::make_unique()), + MainJD(ES.createJITDylib("
")) { + MainJD.addGenerator( cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess( DL.getGlobalPrefix()))); } @@ -76,12 +78,11 @@ class KaleidoscopeJIT { } Error addModule(std::unique_ptr M) { - return OptimizeLayer.add(ES.getMainJITDylib(), - ThreadSafeModule(std::move(M), Ctx)); + return OptimizeLayer.add(MainJD, ThreadSafeModule(std::move(M), Ctx)); } Expected lookup(StringRef Name) { - return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name.str())); + return ES.lookup({&MainJD}, Mangle(Name.str())); } private: diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp index 1fd1fc92a73f9..f4cfb7403dbd7 100644 --- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp +++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp @@ -76,12 +76,12 @@ class SpeculativeJIT { ExecutionSession &getES() { return *ES; } - Error addModule(JITDylib &JD, ThreadSafeModule TSM) { - return CODLayer.add(JD, std::move(TSM)); + Error addModule(ThreadSafeModule TSM) { + return CODLayer.add(MainJD, std::move(TSM)); } Expected lookup(StringRef UnmangledName) { - return ES->lookup({&ES->getMainJITDylib()}, Mangle(UnmangledName)); + return ES->lookup({&MainJD}, Mangle(UnmangledName)); } ~SpeculativeJIT() { CompileThreads.wait(); } @@ -101,15 +101,15 @@ class SpeculativeJIT { std::unique_ptr LCTMgr, IndirectStubsManagerBuilderFunction ISMBuilder, std::unique_ptr ProcessSymbolsGenerator) - : ES(std::move(ES)), DL(std::move(DL)), LCTMgr(std::move(LCTMgr)), + : ES(std::move(ES)), DL(std::move(DL)), + MainJD(this->ES->createJITDylib("
")), LCTMgr(std::move(LCTMgr)), CompileLayer(*this->ES, ObjLayer, ConcurrentIRCompiler(std::move(JTMB))), S(Imps, *this->ES), SpeculateLayer(*this->ES, CompileLayer, S, Mangle, BlockFreqQuery()), CODLayer(*this->ES, SpeculateLayer, *this->LCTMgr, std::move(ISMBuilder)) { - this->ES->getMainJITDylib().addGenerator( - std::move(ProcessSymbolsGenerator)); + MainJD.addGenerator(std::move(ProcessSymbolsGenerator)); this->CODLayer.setImplMap(&Imps); this->ES->setDispatchMaterialization( @@ -119,9 +119,9 @@ class SpeculativeJIT { auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); }; CompileThreads.async(std::move(Work)); }); - ExitOnErr(S.addSpeculationRuntime(this->ES->getMainJITDylib(), Mangle)); + ExitOnErr(S.addSpeculationRuntime(MainJD, Mangle)); LocalCXXRuntimeOverrides CXXRuntimeoverrides; - ExitOnErr(CXXRuntimeoverrides.enable(this->ES->getMainJITDylib(), Mangle)); + ExitOnErr(CXXRuntimeoverrides.enable(MainJD, Mangle)); } static std::unique_ptr createMemMgr() { @@ -133,6 +133,8 @@ class SpeculativeJIT { MangleAndInterner Mangle{*ES, DL}; ThreadPool CompileThreads{NumThreads}; + JITDylib &MainJD; + Triple TT; std::unique_ptr LCTMgr; IRCompileLayer CompileLayer; @@ -172,24 +174,14 @@ int main(int argc, char *argv[]) { return 1; } - ExitOnErr(SJ->addModule(SJ->getES().getMainJITDylib(), - ThreadSafeModule(std::move(M), std::move(Ctx)))); + ExitOnErr(SJ->addModule(ThreadSafeModule(std::move(M), std::move(Ctx)))); } - // Build an argv array for the JIT'd main. - std::vector ArgV; - ArgV.push_back(argv[0]); - for (const auto &InputArg : InputArgv) - ArgV.push_back(InputArg.data()); - ArgV.push_back(nullptr); - - // Look up the JIT'd main, cast it to a function pointer, then call it. - auto MainSym = ExitOnErr(SJ->lookup("main")); - int (*Main)(int, const char *[]) = - (int (*)(int, const char *[]))MainSym.getAddress(); + auto Main = + jitTargetAddressToFunction(MainSym.getAddress()); - Main(ArgV.size() - 1, ArgV.data()); + return runAsMain(Main, InputArgv, StringRef(InputFiles.front())); return 0; } diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index ab60b88a31f46..731f32741e191 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -874,7 +874,7 @@ LLVMMetadataRef LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef Builder, LLVMMetadataRef Type, const char *Name, size_t NameLen, LLVMMetadataRef File, unsigned LineNo, - LLVMMetadataRef Scope); + LLVMMetadataRef Scope, uint32_t AlignInBits); /** * Create debugging information entry to establish inheritance relationship diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 70fc19e82b3c7..afeed67e3f9e8 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -851,6 +851,9 @@ class APFloat : public APFloatBase { APFloat(const fltSemantics &Semantics) : U(Semantics) {} APFloat(const fltSemantics &Semantics, StringRef S); APFloat(const fltSemantics &Semantics, integerPart I) : U(Semantics, I) {} + template ::value>::type> + APFloat(const fltSemantics &Semantics, T V) = delete; // TODO: Remove this constructor. This isn't faster than the first one. APFloat(const fltSemantics &Semantics, uninitializedTag) : U(Semantics, uninitialized) {} diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 796110f753bc0..0791a6d686a3f 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -389,6 +389,11 @@ class LLVM_NODISCARD APInt { /// \returns true if this APInt is positive. bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); } + /// Determine if this APInt Value is non-positive (<= 0). + /// + /// \returns true if this APInt is non-positive. + bool isNonPositive() const { return !isStrictlyPositive(); } + /// Determine if all bits are set /// /// This checks to see if the value has all bits of the APInt are set or not. diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index c8965936fb9c1..41d6c23b8d0d9 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -34,6 +34,7 @@ namespace llvm { class Function; class LoopInfo; class raw_ostream; +class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -187,8 +188,10 @@ class BranchProbabilityInfo { /// Track the set of blocks that always lead to a cold call. SmallPtrSet PostDominatedByColdCall; - void updatePostDominatedByUnreachable(const BasicBlock *BB); - void updatePostDominatedByColdCall(const BasicBlock *BB); + void computePostDominatedByUnreachable(const Function &F, + PostDominatorTree *PDT); + void computePostDominatedByColdCall(const Function &F, + PostDominatorTree *PDT); bool calcUnreachableHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); bool calcColdCallHeuristics(const BasicBlock *BB); diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index 7a572afccd67d..22df60efd84ec 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -300,6 +300,7 @@ using DDGInfo = DependenceGraphInfo; /// Data Dependency Graph class DataDependenceGraph : public DDGBase, public DDGInfo { + friend AbstractDependenceGraphBuilder; friend class DDGBuilder; public: @@ -311,7 +312,7 @@ class DataDependenceGraph : public DDGBase, public DDGInfo { DataDependenceGraph(DataDependenceGraph &&G) : DDGBase(std::move(G)), DDGInfo(std::move(G)) {} DataDependenceGraph(Function &F, DependenceInfo &DI); - DataDependenceGraph(const Loop &L, DependenceInfo &DI); + DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI); ~DataDependenceGraph(); /// If node \p N belongs to a pi-block return a pointer to the pi-block, @@ -381,6 +382,12 @@ class DDGBuilder : public AbstractDependenceGraphBuilder { return *E; } + const NodeListType &getNodesInPiBlock(const DDGNode &N) final override { + auto *PiNode = dyn_cast(&N); + assert(PiNode && "Expected a pi-block node."); + return PiNode->getNodes(); + } + bool shouldCreatePiBlocks() const final override; }; diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h index 876ccbc5c9a04..99465ef39a076 100644 --- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h +++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h @@ -59,6 +59,7 @@ template class AbstractDependenceGraphBuilder { createMemoryDependencyEdges(); createAndConnectRootNode(); createPiBlocks(); + sortNodesTopologically(); } /// Create fine grained nodes. These are typically atomic nodes that @@ -84,6 +85,9 @@ template class AbstractDependenceGraphBuilder { /// the dependence graph into an acyclic graph. void createPiBlocks(); + /// Topologically sort the graph nodes. + void sortNodesTopologically(); + protected: /// Create the root node of the graph. virtual NodeType &createRootNode() = 0; @@ -104,6 +108,10 @@ template class AbstractDependenceGraphBuilder { /// Create a rooted edge going from \p Src to \p Tgt . virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0; + /// Given a pi-block node, return a vector of all the nodes contained within + /// it. + virtual const NodeListType &getNodesInPiBlock(const NodeType &N) = 0; + /// Deallocate memory of edge \p E. virtual void destroyEdge(EdgeType &E) { delete &E; } diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 57f84c1d0ebf0..5286f6a220ec8 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1922,6 +1922,13 @@ class ScalarEvolutionAnalysis ScalarEvolution run(Function &F, FunctionAnalysisManager &AM); }; +/// Verifier pass for the \c ScalarEvolutionAnalysis results. +class ScalarEvolutionVerifierPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + /// Printer pass for the \c ScalarEvolutionAnalysis results. class ScalarEvolutionPrinterPass : public PassInfoMixin { diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index e9c96cc96cb53..f5f805493d320 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -82,13 +82,36 @@ struct VFParameter { struct VFShape { unsigned VF; // Vectorization factor. bool IsScalable; // True if the function is a scalable function. - VFISAKind ISA; // Instruction Set Architecture. SmallVector Parameters; // List of parameter informations. // Comparison operator. bool operator==(const VFShape &Other) const { - return std::tie(VF, IsScalable, ISA, Parameters) == - std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters); + return std::tie(VF, IsScalable, Parameters) == + std::tie(Other.VF, Other.IsScalable, Other.Parameters); } + + /// Update the parameter in position P.ParamPos to P. + void updateParam(VFParameter P) { + assert(P.ParamPos < Parameters.size() && "Invalid parameter position."); + Parameters[P.ParamPos] = P; + assert(hasValidParameterList() && "Invalid parameter list"); + } + + // Retrieve the basic vectorization shape of the function, where all + // parameters are mapped to VFParamKind::Vector with \p EC + // lanes. Specifies whether the function has a Global Predicate + // argument via \p HasGlobalPred. + static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) { + SmallVector Parameters; + for (unsigned I = 0; I < CI.arg_size(); ++I) + Parameters.push_back(VFParameter({I, VFParamKind::Vector})); + if (HasGlobalPred) + Parameters.push_back( + VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); + + return {EC.Min, EC.Scalable, Parameters}; + } + /// Sanity check on the Parameters in the VFShape. + bool hasValidParameterList() const; }; /// Holds the VFShape for a specific scalar to vector function mapping. @@ -96,11 +119,12 @@ struct VFInfo { VFShape Shape; // Classification of the vector function. StringRef ScalarName; // Scalar Function Name. StringRef VectorName; // Vector Function Name associated to this VFInfo. + VFISAKind ISA; // Instruction Set Architecture. // Comparison operator. bool operator==(const VFInfo &Other) const { - return std::tie(Shape, ScalarName, VectorName) == - std::tie(Shape, Other.ScalarName, Other.VectorName); + return std::tie(Shape, ScalarName, VectorName, ISA) == + std::tie(Shape, Other.ScalarName, Other.VectorName, Other.ISA); } }; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 46edfb6260be1..caab91da9c839 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1199,8 +1199,9 @@ enum { PT_SUNW_EH_FRAME = 0x6474e550, PT_SUNW_UNWIND = 0x6464e550, - PT_GNU_STACK = 0x6474e551, // Indicates stack executability. - PT_GNU_RELRO = 0x6474e552, // Read-only after relocation. + PT_GNU_STACK = 0x6474e551, // Indicates stack executability. + PT_GNU_RELRO = 0x6474e552, // Read-only after relocation. + PT_GNU_PROPERTY = 0x6474e553, // .note.gnu.property notes sections. PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data. PT_OPENBSD_WXNEEDED = 0x65a3dbe7, // Program does W^X violations. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index d184530d6447d..642f8828b0f57 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -1157,6 +1157,12 @@ class LegalizerInfo { virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + /// Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while + /// widening a constant of type SmallTy which targets can override. + /// For eg, the DAG does (SmallTy.isByteSized() ? G_SEXT : G_ZEXT) which + /// will be the default. + virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const; + private: /// Determine what action should be taken to legalize the given generic /// instruction opcode, type-index and type. Requires computeTables to have diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 503227222207f..149fe043d1f56 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -38,6 +38,51 @@ enum class MachineCombinerPattern { MULSUBX_OP2, MULADDXI_OP1, MULSUBXI_OP1, + // NEON integers vectors + MULADDv8i8_OP1, + MULADDv8i8_OP2, + MULADDv16i8_OP1, + MULADDv16i8_OP2, + MULADDv4i16_OP1, + MULADDv4i16_OP2, + MULADDv8i16_OP1, + MULADDv8i16_OP2, + MULADDv2i32_OP1, + MULADDv2i32_OP2, + MULADDv4i32_OP1, + MULADDv4i32_OP2, + + MULSUBv8i8_OP1, + MULSUBv8i8_OP2, + MULSUBv16i8_OP1, + MULSUBv16i8_OP2, + MULSUBv4i16_OP1, + MULSUBv4i16_OP2, + MULSUBv8i16_OP1, + MULSUBv8i16_OP2, + MULSUBv2i32_OP1, + MULSUBv2i32_OP2, + MULSUBv4i32_OP1, + MULSUBv4i32_OP2, + + MULADDv4i16_indexed_OP1, + MULADDv4i16_indexed_OP2, + MULADDv8i16_indexed_OP1, + MULADDv8i16_indexed_OP2, + MULADDv2i32_indexed_OP1, + MULADDv2i32_indexed_OP2, + MULADDv4i32_indexed_OP1, + MULADDv4i32_indexed_OP2, + + MULSUBv4i16_indexed_OP1, + MULSUBv4i16_indexed_OP2, + MULSUBv8i16_indexed_OP1, + MULSUBv8i16_indexed_OP2, + MULSUBv2i32_indexed_OP1, + MULSUBv2i32_indexed_OP2, + MULSUBv4i32_indexed_OP1, + MULSUBv4i32_indexed_OP2, + // Floating Point FMULADDH_OP1, FMULADDH_OP2, diff --git a/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/llvm/include/llvm/CodeGen/MachineInstrBundle.h index 1810d23072d00..517f03e609337 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBundle.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBundle.h @@ -75,12 +75,12 @@ inline MachineBasicBlock::const_instr_iterator getBundleEnd( } //===----------------------------------------------------------------------===// -// MachineOperand iterator +// MachineBundleOperand iterator // -/// MachineOperandIteratorBase - Iterator that can visit all operands on a -/// MachineInstr, or all operands on a bundle of MachineInstrs. This class is -/// not intended to be used directly, use one of the sub-classes instead. +/// MIBundleOperandIteratorBase - Iterator that visits all operands in a bundle +/// of MachineInstrs. This class is not intended to be used directly, use one +/// of the sub-classes instead. /// /// Intended use: /// @@ -90,7 +90,10 @@ inline MachineBasicBlock::const_instr_iterator getBundleEnd( /// ... /// } /// -class MachineOperandIteratorBase { +template +class MIBundleOperandIteratorBase + : public iterator_facade_base, + std::forward_iterator_tag, ValueT> { MachineBasicBlock::instr_iterator InstrI, InstrE; MachineInstr::mop_iterator OpI, OpE; @@ -99,35 +102,34 @@ class MachineOperandIteratorBase { void advance() { while (OpI == OpE) { // Don't advance off the basic block, or into a new bundle. - if (++InstrI == InstrE || !InstrI->isInsideBundle()) + if (++InstrI == InstrE || !InstrI->isInsideBundle()) { + InstrI = InstrE; break; + } OpI = InstrI->operands_begin(); OpE = InstrI->operands_end(); } } protected: - /// MachineOperandIteratorBase - Create an iterator that visits all operands + /// MIBundleOperandIteratorBase - Create an iterator that visits all operands /// on MI, or all operands on every instruction in the bundle containing MI. /// /// @param MI The instruction to examine. - /// @param WholeBundle When true, visit all operands on the entire bundle. /// - explicit MachineOperandIteratorBase(MachineInstr &MI, bool WholeBundle) { - if (WholeBundle) { - InstrI = getBundleStart(MI.getIterator()); - InstrE = MI.getParent()->instr_end(); - } else { - InstrI = InstrE = MI.getIterator(); - ++InstrE; - } + explicit MIBundleOperandIteratorBase(MachineInstr &MI) { + InstrI = getBundleStart(MI.getIterator()); + InstrE = MI.getParent()->instr_end(); OpI = InstrI->operands_begin(); OpE = InstrI->operands_end(); - if (WholeBundle) - advance(); + advance(); } - MachineOperand &deref() const { return *OpI; } + /// Constructor for an iterator past the last iteration: both instruction + /// iterators point to the end of the BB and OpI == OpE. + explicit MIBundleOperandIteratorBase(MachineBasicBlock::instr_iterator InstrE, + MachineInstr::mop_iterator OpE) + : InstrI(InstrE), InstrE(InstrE), OpI(OpE), OpE(OpE) {} public: /// isValid - Returns true until all the operands have been visited. @@ -140,123 +142,148 @@ class MachineOperandIteratorBase { advance(); } + ValueT &operator*() const { return *OpI; } + ValueT *operator->() const { return &*OpI; } + + bool operator==(const MIBundleOperandIteratorBase &Arg) const { + // Iterators are equal, if InstrI matches and either OpIs match or OpI == + // OpE match for both. The second condition allows us to construct an 'end' + // iterator, without finding the last instruction in a bundle up-front. + return InstrI == Arg.InstrI && + (OpI == Arg.OpI || (OpI == OpE && Arg.OpI == Arg.OpE)); + } /// getOperandNo - Returns the number of the current operand relative to its /// instruction. /// unsigned getOperandNo() const { return OpI - InstrI->operands_begin(); } - - /// VirtRegInfo - Information about a virtual register used by a set of operands. - /// - struct VirtRegInfo { - /// Reads - One of the operands read the virtual register. This does not - /// include undef or internal use operands, see MO::readsReg(). - bool Reads; - - /// Writes - One of the operands writes the virtual register. - bool Writes; - - /// Tied - Uses and defs must use the same register. This can be because of - /// a two-address constraint, or there may be a partial redefinition of a - /// sub-register. - bool Tied; - }; - - /// Information about how a physical register Reg is used by a set of - /// operands. - struct PhysRegInfo { - /// There is a regmask operand indicating Reg is clobbered. - /// \see MachineOperand::CreateRegMask(). - bool Clobbered; - - /// Reg or one of its aliases is defined. The definition may only cover - /// parts of the register. - bool Defined; - /// Reg or a super-register is defined. The definition covers the full - /// register. - bool FullyDefined; - - /// Reg or one of its aliases is read. The register may only be read - /// partially. - bool Read; - /// Reg or a super-register is read. The full register is read. - bool FullyRead; - - /// Either: - /// - Reg is FullyDefined and all defs of reg or an overlapping - /// register are dead, or - /// - Reg is completely dead because "defined" by a clobber. - bool DeadDef; - - /// Reg is Defined and all defs of reg or an overlapping register are - /// dead. - bool PartialDeadDef; - - /// There is a use operand of reg or a super-register with kill flag set. - bool Killed; - }; - - /// analyzeVirtReg - Analyze how the current instruction or bundle uses a - /// virtual register. This function should not be called after operator++(), - /// it expects a fresh iterator. - /// - /// @param Reg The virtual register to analyze. - /// @param Ops When set, this vector will receive an (MI, OpNum) entry for - /// each operand referring to Reg. - /// @returns A filled-in RegInfo struct. - VirtRegInfo analyzeVirtReg(unsigned Reg, - SmallVectorImpl > *Ops = nullptr); - - /// analyzePhysReg - Analyze how the current instruction or bundle uses a - /// physical register. This function should not be called after operator++(), - /// it expects a fresh iterator. - /// - /// @param Reg The physical register to analyze. - /// @returns A filled-in PhysRegInfo struct. - PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI); }; -/// MIOperands - Iterate over operands of a single instruction. +/// MIBundleOperands - Iterate over all operands in a bundle of machine +/// instructions. /// -class MIOperands : public MachineOperandIteratorBase { +class MIBundleOperands : public MIBundleOperandIteratorBase { + /// Constructor for an iterator past the last iteration. + MIBundleOperands(MachineBasicBlock::instr_iterator InstrE, + MachineInstr::mop_iterator OpE) + : MIBundleOperandIteratorBase(InstrE, OpE) {} + public: - MIOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, false) {} - MachineOperand &operator* () const { return deref(); } - MachineOperand *operator->() const { return &deref(); } + MIBundleOperands(MachineInstr &MI) : MIBundleOperandIteratorBase(MI) {} + + /// Returns an iterator past the last iteration. + static MIBundleOperands end(const MachineBasicBlock &MBB) { + return {const_cast(MBB).instr_end(), + const_cast(MBB).instr_begin()->operands_end()}; + } }; -/// ConstMIOperands - Iterate over operands of a single const instruction. +/// ConstMIBundleOperands - Iterate over all operands in a const bundle of +/// machine instructions. /// -class ConstMIOperands : public MachineOperandIteratorBase { +class ConstMIBundleOperands + : public MIBundleOperandIteratorBase { + + /// Constructor for an iterator past the last iteration. + ConstMIBundleOperands(MachineBasicBlock::instr_iterator InstrE, + MachineInstr::mop_iterator OpE) + : MIBundleOperandIteratorBase(InstrE, OpE) {} + public: - ConstMIOperands(const MachineInstr &MI) - : MachineOperandIteratorBase(const_cast(MI), false) {} - const MachineOperand &operator* () const { return deref(); } - const MachineOperand *operator->() const { return &deref(); } + ConstMIBundleOperands(const MachineInstr &MI) + : MIBundleOperandIteratorBase(const_cast(MI)) {} + + /// Returns an iterator past the last iteration. + static ConstMIBundleOperands end(const MachineBasicBlock &MBB) { + return {const_cast(MBB).instr_end(), + const_cast(MBB).instr_begin()->operands_end()}; + } }; -/// MIBundleOperands - Iterate over all operands in a bundle of machine -/// instructions. +inline iterator_range +const_mi_bundle_ops(const MachineInstr &MI) { + return make_range(ConstMIBundleOperands(MI), + ConstMIBundleOperands::end(*MI.getParent())); +} + +inline iterator_range mi_bundle_ops(MachineInstr &MI) { + return make_range(MIBundleOperands(MI), + MIBundleOperands::end(*MI.getParent())); +} + +/// VirtRegInfo - Information about a virtual register used by a set of +/// operands. /// -class MIBundleOperands : public MachineOperandIteratorBase { -public: - MIBundleOperands(MachineInstr &MI) : MachineOperandIteratorBase(MI, true) {} - MachineOperand &operator* () const { return deref(); } - MachineOperand *operator->() const { return &deref(); } +struct VirtRegInfo { + /// Reads - One of the operands read the virtual register. This does not + /// include undef or internal use operands, see MO::readsReg(). + bool Reads; + + /// Writes - One of the operands writes the virtual register. + bool Writes; + + /// Tied - Uses and defs must use the same register. This can be because of + /// a two-address constraint, or there may be a partial redefinition of a + /// sub-register. + bool Tied; }; -/// ConstMIBundleOperands - Iterate over all operands in a const bundle of -/// machine instructions. +/// AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses +/// a virtual register. This function should not be called after operator++(), +/// it expects a fresh iterator. /// -class ConstMIBundleOperands : public MachineOperandIteratorBase { -public: - ConstMIBundleOperands(const MachineInstr &MI) - : MachineOperandIteratorBase(const_cast(MI), true) {} - const MachineOperand &operator* () const { return deref(); } - const MachineOperand *operator->() const { return &deref(); } +/// @param Reg The virtual register to analyze. +/// @param Ops When set, this vector will receive an (MI, OpNum) entry for +/// each operand referring to Reg. +/// @returns A filled-in RegInfo struct. +VirtRegInfo AnalyzeVirtRegInBundle( + MachineInstr &MI, unsigned Reg, + SmallVectorImpl> *Ops = nullptr); + +/// Information about how a physical register Reg is used by a set of +/// operands. +struct PhysRegInfo { + /// There is a regmask operand indicating Reg is clobbered. + /// \see MachineOperand::CreateRegMask(). + bool Clobbered; + + /// Reg or one of its aliases is defined. The definition may only cover + /// parts of the register. + bool Defined; + /// Reg or a super-register is defined. The definition covers the full + /// register. + bool FullyDefined; + + /// Reg or one of its aliases is read. The register may only be read + /// partially. + bool Read; + /// Reg or a super-register is read. The full register is read. + bool FullyRead; + + /// Either: + /// - Reg is FullyDefined and all defs of reg or an overlapping + /// register are dead, or + /// - Reg is completely dead because "defined" by a clobber. + bool DeadDef; + + /// Reg is Defined and all defs of reg or an overlapping register are + /// dead. + bool PartialDeadDef; + + /// There is a use operand of reg or a super-register with kill flag set. + bool Killed; }; +/// AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses +/// a physical register. This function should not be called after operator++(), +/// it expects a fresh iterator. +/// +/// @param Reg The physical register to analyze. +/// @returns A filled-in PhysRegInfo struct. +PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg, + const TargetRegisterInfo *TRI); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h index 75e871d974757..3b02d0860ea15 100644 --- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h +++ b/llvm/include/llvm/CodeGen/MachineSizeOpts.h @@ -23,14 +23,16 @@ class MachineBlockFrequencyInfo; class MachineFunction; /// Returns true if machine function \p MF is suggested to be size-optimized -/// base on the profile. +/// based on the profile. bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *BFI); + const MachineBlockFrequencyInfo *BFI, + PGSOQueryType QueryType = PGSOQueryType::Other); /// Returns true if machine basic block \p MBB is suggested to be size-optimized -/// base on the profile. +/// based on the profile. bool shouldOptimizeForSize(const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI); + const MachineBlockFrequencyInfo *MBFI, + PGSOQueryType QueryType = PGSOQueryType::Other); } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index c21414760ce9c..4e3451d80572b 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -463,6 +463,9 @@ namespace llvm { /// Create Hardware Loop pass. \see HardwareLoops.cpp FunctionPass *createHardwareLoopsPass(); + /// Create IR Type Promotion pass. \see TypePromotion.cpp + FunctionPass *createTypePromotionPass(); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h index 9ab9e8068eabf..ac001e326c570 100644 --- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -87,17 +87,42 @@ class ReachingDefAnalysis : public MachineFunctionPass { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); + MachineFunctionProperties::Property::NoVRegs).set( + MachineFunctionProperties::Property::TracksLiveness); } /// Provides the instruction id of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. int getReachingDef(MachineInstr *MI, int PhysReg); + /// Provides the instruction of the closest reaching def instruction of + /// PhysReg that reaches MI, relative to the begining of MI's basic block. + MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg); + + /// Provides the MI, from the given block, corresponding to the Id or a + /// nullptr if the id does not refer to the block. + MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId); + + /// Return whether A and B use the same def of PhysReg. + bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg); + + /// Return whether the given register is used after MI, whether it's a local + /// use or a live out. + bool isRegUsedAfter(MachineInstr *MI, int PhysReg); + /// Provides the clearance - the number of instructions since the closest /// reaching def instuction of PhysReg that reaches MI. int getClearance(MachineInstr *MI, MCPhysReg PhysReg); + /// Provides the uses, in the same block as MI, of register that MI defines. + /// This does not consider live-outs. + void getReachingLocalUses(MachineInstr *MI, int PhysReg, + SmallVectorImpl &Uses); + + /// Provide the number of uses, in the same block as MI, of the register that + /// MI defines. + unsigned getNumUses(MachineInstr *MI, int PhysReg); + private: /// Set up LiveRegs by merging predecessor live-out values. void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index a0e37a19b37dd..3c5675395e114 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -427,7 +427,7 @@ class SelectionDAG { const TargetLibraryInfo &getLibInfo() const { return *LibInfo; } const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; } const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; } - LLVMContext *getContext() const {return Context; } + LLVMContext *getContext() const { return Context; } OptimizationRemarkEmitter &getORE() const { return *ORE; } ProfileSummaryInfo *getPSI() const { return PSI; } BlockFrequencyInfo *getBFI() const { return BFI; } @@ -1136,14 +1136,19 @@ class SelectionDAG { /// Returns sum of the base pointer and offset. SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL); - SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, - SDValue Mask, SDValue Src0, EVT MemVT, - MachineMemOperand *MMO, ISD::LoadExtType, - bool IsExpanding = false); + SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, + SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + ISD::LoadExtType, bool IsExpanding = false); + SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM); SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, bool IsTruncating = false, - bool IsCompressing = false); + SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + bool IsTruncating = false, bool IsCompressing = false); + SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 3b799f967318a..e18278f8cdc61 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -553,6 +553,7 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class MaskedLoadStoreSDNode; friend class MaskedGatherScatterSDNode; uint16_t : NumMemSDNodeBits; @@ -560,6 +561,7 @@ BEGIN_TWO_BYTE_PACK() // This storage is shared between disparate class hierarchies to hold an // enumeration specific to the class hierarchy in use. // LSBaseSDNode => enum ISD::MemIndexedMode + // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode // MaskedGatherScatterSDNode => enum ISD::MemIndexType uint16_t AddressingMode : 3; }; @@ -2273,19 +2275,38 @@ class MaskedLoadStoreSDNode : public MemSDNode { friend class SelectionDAG; MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, - const DebugLoc &dl, SDVTList VTs, EVT MemVT, + const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, EVT MemVT, MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = AM; + assert(getAddressingMode() == AM && "Value truncated"); + } - // MaskedLoadSDNode (Chain, ptr, mask, passthru) - // MaskedStoreSDNode (Chain, data, ptr, mask) + // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) + // MaskedStoreSDNode (Chain, data, ptr, offset, mask) // Mask is a vector of i1 elements const SDValue &getBasePtr() const { return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2); } - const SDValue &getMask() const { + const SDValue &getOffset() const { return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); } + const SDValue &getMask() const { + return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); + } + + /// Return the addressing mode for this load or store: + /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. + ISD::MemIndexedMode getAddressingMode() const { + return static_cast(LSBaseSDNodeBits.AddressingMode); + } + + /// Return true if this is a pre/post inc/dec load/store. + bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } + + /// Return true if this is NOT a pre/post inc/dec load/store. + bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD || @@ -2299,9 +2320,9 @@ class MaskedLoadSDNode : public MaskedLoadStoreSDNode { friend class SelectionDAG; MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT, - MachineMemOperand *MMO) - : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) { + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, + bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) + : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { LoadSDNodeBits.ExtTy = ETy; LoadSDNodeBits.IsExpanding = IsExpanding; } @@ -2311,8 +2332,9 @@ class MaskedLoadSDNode : public MaskedLoadStoreSDNode { } const SDValue &getBasePtr() const { return getOperand(1); } - const SDValue &getMask() const { return getOperand(2); } - const SDValue &getPassThru() const { return getOperand(3); } + const SDValue &getOffset() const { return getOperand(2); } + const SDValue &getMask() const { return getOperand(3); } + const SDValue &getPassThru() const { return getOperand(4); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD; @@ -2327,9 +2349,9 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode { friend class SelectionDAG; MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - bool isTrunc, bool isCompressing, EVT MemVT, - MachineMemOperand *MMO) - : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) { + ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, + EVT MemVT, MachineMemOperand *MMO) + : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { StoreSDNodeBits.IsTruncating = isTrunc; StoreSDNodeBits.IsCompressing = isCompressing; } @@ -2345,9 +2367,10 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode { /// memory at base_addr. bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } - const SDValue &getValue() const { return getOperand(1); } + const SDValue &getValue() const { return getOperand(1); } const SDValue &getBasePtr() const { return getOperand(2); } - const SDValue &getMask() const { return getOperand(3); } + const SDValue &getOffset() const { return getOperand(3); } + const SDValue &getMask() const { return getOperand(4); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MSTORE; diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index fa84d0efbdea9..12010d9c74af4 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1110,12 +1110,8 @@ class TargetLoweringBase { /// Return how the indexed load should be treated: either it is legal, needs /// to be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. - LegalizeAction - getIndexedLoadAction(unsigned IdxMode, MVT VT) const { - assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && - "Table isn't big enough!"); - unsigned Ty = (unsigned)VT.SimpleTy; - return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); + LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_Load); } /// Return true if the specified indexed load is legal on this target. @@ -1128,12 +1124,8 @@ class TargetLoweringBase { /// Return how the indexed store should be treated: either it is legal, needs /// to be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. - LegalizeAction - getIndexedStoreAction(unsigned IdxMode, MVT VT) const { - assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && - "Table isn't big enough!"); - unsigned Ty = (unsigned)VT.SimpleTy; - return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); + LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_Store); } /// Return true if the specified indexed load is legal on this target. @@ -1143,6 +1135,34 @@ class TargetLoweringBase { getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); } + /// Return how the indexed load should be treated: either it is legal, needs + /// to be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad); + } + + /// Return true if the specified indexed load is legal on this target. + bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { + return VT.isSimple() && + (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || + getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); + } + + /// Return how the indexed store should be treated: either it is legal, needs + /// to be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore); + } + + /// Return true if the specified indexed load is legal on this target. + bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { + return VT.isSimple() && + (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || + getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); + } + /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. @@ -2030,13 +2050,8 @@ class TargetLoweringBase { /// /// NOTE: All indexed mode loads are initialized to Expand in /// TargetLowering.cpp - void setIndexedLoadAction(unsigned IdxMode, MVT VT, - LegalizeAction Action) { - assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && - (unsigned)Action < 0xf && "Table isn't big enough!"); - // Load action are kept in the upper half. - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; + void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); } /// Indicate that the specified indexed store does or does not work with the @@ -2044,13 +2059,28 @@ class TargetLoweringBase { /// /// NOTE: All indexed mode stores are initialized to Expand in /// TargetLowering.cpp - void setIndexedStoreAction(unsigned IdxMode, MVT VT, - LegalizeAction Action) { - assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && - (unsigned)Action < 0xf && "Table isn't big enough!"); - // Store action are kept in the lower half. - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); + void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); + } + + /// Indicate that the specified indexed masked load does or does not work with + /// the specified type and indicate what to do about it. + /// + /// NOTE: All indexed mode masked loads are initialized to Expand in + /// TargetLowering.cpp + void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, + LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action); + } + + /// Indicate that the specified indexed masked store does or does not work + /// with the specified type and indicate what to do about it. + /// + /// NOTE: All indexed mode masked stores are initialized to Expand in + /// TargetLowering.cpp + void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, + LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action); } /// Indicate that the specified condition code is or isn't supported on the @@ -2763,13 +2793,13 @@ class TargetLoweringBase { /// truncating store of a specific value type and truncating type is legal. LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; - /// For each indexed mode and each value type, keep a pair of LegalizeAction + /// For each indexed mode and each value type, keep a quad of LegalizeAction /// that indicates how instruction selection should deal with the load / - /// store. + /// store / maskedload / maskedstore. /// /// The first dimension is the value_type for the reference. The second /// dimension represents the various modes for load store. - uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; + uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; /// For each condition code (ISD::CondCode) keep a LegalizeAction that /// indicates how instruction selection should deal with the condition code. @@ -2812,6 +2842,32 @@ class TargetLoweringBase { /// Set default libcall names and calling conventions. void InitLibcalls(const Triple &TT); + /// The bits of IndexedModeActions used to store the legalisation actions + /// We store the data as | ML | MS | L | S | each taking 4 bits. + enum IndexedModeActionsBits { + IMAB_Store = 0, + IMAB_Load = 4, + IMAB_MaskedStore = 8, + IMAB_MaskedLoad = 12 + }; + + void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, + LegalizeAction Action) { + assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && + (unsigned)Action < 0xf && "Table isn't big enough!"); + unsigned Ty = (unsigned)VT.SimpleTy; + IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); + IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; + } + + LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, + unsigned Shift) const { + assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && + "Table isn't big enough!"); + unsigned Ty = (unsigned)VT.SimpleTy; + return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); + } + protected: /// Return true if the extension represented by \p I is free. /// \pre \p I is a sign, zero, or fp extension and @@ -3247,9 +3303,7 @@ class TargetLowering : public TargetLoweringBase { bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } - bool isAfterLegalizeDAG() const { - return Level == AfterLegalizeDAG; - } + bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } CombineLevel getDAGCombineLevel() { return Level; } bool isCalledByLegalizer() const { return CalledByLegalizer; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 2dec107d1458d..f0896b1fc5ae5 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -75,6 +75,7 @@ class DWARFContext : public DIContext { DWARFUnitVector DWOUnits; std::unique_ptr AbbrevDWO; + std::unique_ptr MacroDWO; /// The maximum DWARF version of all units. unsigned MaxVersion = 0; @@ -271,6 +272,9 @@ class DWARFContext : public DIContext { /// Get a pointer to the parsed DebugMacro object. const DWARFDebugMacro *getDebugMacro(); + /// Get a pointer to the parsed DebugMacroDWO object. + const DWARFDebugMacro *getDebugMacroDWO(); + /// Get a reference to the parsed accelerator table object. const DWARFDebugNames &getDebugNames(); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index 358cacb65afd0..3b141304f85f4 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -34,6 +34,9 @@ struct DWARFLocationEntry { /// The second value of the location entry (if applicable). uint64_t Value1; + /// The index of the section this entry is relative to (if applicable). + uint64_t SectionIndex; + /// The location expression itself (if applicable). SmallVector Loc; }; @@ -60,8 +63,9 @@ class DWARFLocationTable { /// updated to point past the end of the current list). bool dumpLocationList(uint64_t *Offset, raw_ostream &OS, Optional BaseAddr, - const MCRegisterInfo *MRI, DWARFUnit *U, - DIDumpOptions DumpOpts, unsigned Indent) const; + const MCRegisterInfo *MRI, const DWARFObject &Obj, + DWARFUnit *U, DIDumpOptions DumpOpts, + unsigned Indent) const; Error visitAbsoluteLocationList( uint64_t Offset, Optional BaseAddr, @@ -72,7 +76,8 @@ class DWARFLocationTable { DWARFDataExtractor Data; virtual void dumpRawEntry(const DWARFLocationEntry &Entry, raw_ostream &OS, - unsigned Indent) const = 0; + unsigned Indent, DIDumpOptions DumpOpts, + const DWARFObject &Obj) const = 0; }; class DWARFDebugLoc final : public DWARFLocationTable { @@ -98,7 +103,8 @@ class DWARFDebugLoc final : public DWARFLocationTable { : DWARFLocationTable(std::move(Data)) {} /// Print the location lists found within the debug_loc section. - void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, DIDumpOptions DumpOpts, + void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, + const DWARFObject &Obj, DIDumpOptions DumpOpts, Optional Offset) const; Error visitLocationList( @@ -107,7 +113,8 @@ class DWARFDebugLoc final : public DWARFLocationTable { protected: void dumpRawEntry(const DWARFLocationEntry &Entry, raw_ostream &OS, - unsigned Indent) const override; + unsigned Indent, DIDumpOptions DumpOpts, + const DWARFObject &Obj) const override; }; class DWARFDebugLoclists final : public DWARFLocationTable { @@ -121,11 +128,13 @@ class DWARFDebugLoclists final : public DWARFLocationTable { /// Dump all location lists within the given range. void dumpRange(uint64_t StartOffset, uint64_t Size, raw_ostream &OS, - const MCRegisterInfo *MRI, DIDumpOptions DumpOpts); + const MCRegisterInfo *MRI, const DWARFObject &Obj, + DIDumpOptions DumpOpts); protected: void dumpRawEntry(const DWARFLocationEntry &Entry, raw_ostream &OS, - unsigned Indent) const override; + unsigned Indent, DIDumpOptions DumpOpts, + const DWARFObject &Obj) const override; private: uint16_t Version; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index 952c41e188c7d..88e5432851d67 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -45,6 +45,12 @@ struct RangeListEntry : public DWARFListEntryBase { /// A class representing a single rangelist. class DWARFDebugRnglist : public DWARFListType { public: + /// Build a DWARFAddressRangesVector from a rangelist. + DWARFAddressRangesVector + getAbsoluteRanges(Optional BaseAddr, + function_ref(uint32_t)> + LookupPooledAddress) const; + /// Build a DWARFAddressRangesVector from a rangelist. DWARFAddressRangesVector getAbsoluteRanges(llvm::Optional BaseAddr, diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h index 9cd34a588c564..fbcde7d7cd788 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h @@ -48,6 +48,7 @@ class DWARFObject { virtual const DWARFSection &getRangesSection() const { return Dummy; } virtual const DWARFSection &getRnglistsSection() const { return Dummy; } virtual StringRef getMacinfoSection() const { return ""; } + virtual StringRef getMacinfoDWOSection() const { return ""; } virtual const DWARFSection &getPubnamesSection() const { return Dummy; } virtual const DWARFSection &getPubtypesSection() const { return Dummy; } virtual const DWARFSection &getGnuPubnamesSection() const { return Dummy; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 98d7a7ee3cae1..36fdd511d1e25 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -427,14 +427,18 @@ class DWARFUnit { /// an entry in the rangelist table's offset array and is supplied by /// DW_FORM_rnglistx. Optional getRnglistOffset(uint32_t Index) { - if (RngListTable) - return RngListTable->getOffsetEntry(Index); + if (!RngListTable) + return None; + if (Optional Off = RngListTable->getOffsetEntry(Index)) + return *Off + RangeSectionBase; return None; } Optional getLoclistOffset(uint32_t Index) { - if (LoclistTableHeader) - return LoclistTableHeader->getOffsetEntry(Index); + if (!LoclistTableHeader) + return None; + if (Optional Off = LoclistTableHeader->getOffsetEntry(Index)) + return *Off + getLocSectionBase(); return None; } Expected collectAddressRanges(); diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 11599fc1797d8..8bfa5432b8112 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Error.h" #include #include @@ -44,6 +45,7 @@ class LLVMSymbolizer { std::vector DsymHints; std::string FallbackDebugPath; std::string DWPName; + std::vector DebugFileDirectory; }; LLVMSymbolizer() = default; @@ -98,6 +100,9 @@ class LLVMSymbolizer { ObjectFile *lookUpDebuglinkObject(const std::string &Path, const ObjectFile *Obj, const std::string &ArchName); + ObjectFile *lookUpBuildIDObject(const std::string &Path, + const ELFObjectFileBase *Obj, + const std::string &ArchName); /// Returns pair of pointers to object and debug object. Expected getOrCreateObjectPair(const std::string &Path, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index aebd55563e615..7470cca498068 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -324,14 +324,14 @@ class Symbol { } static Symbol &constructExternal(void *SymStorage, Addressable &Base, - StringRef Name, JITTargetAddress Size) { + StringRef Name, JITTargetAddress Size, + Linkage L) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && "Cannot create external symbol from defined block"); assert(!Name.empty() && "External symbol name cannot be empty"); auto *Sym = reinterpret_cast(SymStorage); - new (Sym) Symbol(Base, 0, Name, Size, Linkage::Strong, Scope::Default, - false, false); + new (Sym) Symbol(Base, 0, Name, Size, L, Scope::Default, false, false); return *Sym; } @@ -477,7 +477,7 @@ class Symbol { /// Set the linkage for this Symbol. void setLinkage(Linkage L) { - assert((L == Linkage::Strong || (Base->isDefined() && !Name.empty())) && + assert((L == Linkage::Strong || (!Base->isAbsolute() && !Name.empty())) && "Linkage can only be applied to defined named symbols"); this->L = static_cast(L); } @@ -849,9 +849,14 @@ class LinkGraph { /// Add an external symbol. /// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose /// size is not known, you should substitute '0'. - Symbol &addExternalSymbol(StringRef Name, uint64_t Size) { - auto &Sym = Symbol::constructExternal( - Allocator.Allocate(), createAddressable(0, false), Name, Size); + /// For external symbols Linkage determines whether the symbol must be + /// present during lookup: Externals with strong linkage must be found or + /// an error will be emitted. Externals with weak linkage are permitted to + /// be undefined, in which case they are assigned a value of 0. + Symbol &addExternalSymbol(StringRef Name, uint64_t Size, Linkage L) { + auto &Sym = + Symbol::constructExternal(Allocator.Allocate(), + createAddressable(0, false), Name, Size, L); ExternalSymbols.insert(&Sym); return Sym; } @@ -1189,6 +1194,14 @@ struct PassConfiguration { LinkGraphPassList PostFixupPasses; }; +/// Flags for symbol lookup. +/// +/// FIXME: These basically duplicate orc::SymbolLookupFlags -- We should merge +/// the two types once we have an OrcSupport library. +enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol }; + +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF); + /// A map of symbol names to resolved addresses. using AsyncLookupResult = DenseMap; @@ -1223,6 +1236,8 @@ createLookupContinuation(Continuation Cont) { /// Holds context for a single jitLink invocation. class JITLinkContext { public: + using LookupMap = DenseMap; + /// Destroy a JITLinkContext. virtual ~JITLinkContext(); @@ -1240,7 +1255,7 @@ class JITLinkContext { /// Called by JITLink to resolve external symbols. This method is passed a /// lookup continutation which it must call with a result to continue the /// linking process. - virtual void lookup(const DenseSet &Symbols, + virtual void lookup(const LookupMap &Symbols, std::unique_ptr LC) = 0; /// Called by JITLink once all defined symbols in the graph have been assigned diff --git a/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/llvm/include/llvm/ExecutionEngine/JITSymbol.h index c0f1ca4b98760..7a2a6cfa52037 100644 --- a/llvm/include/llvm/ExecutionEngine/JITSymbol.h +++ b/llvm/include/llvm/ExecutionEngine/JITSymbol.h @@ -41,6 +41,11 @@ class SymbolRef; using JITTargetAddress = uint64_t; /// Convert a JITTargetAddress to a pointer. +/// +/// Note: This is a raw cast of the address bit pattern to the given pointer +/// type. When casting to a function pointer in order to execute JIT'd code +/// jitTargetAddressToFunction should be preferred, as it will also perform +/// pointer signing on targets that require it. template T jitTargetAddressToPointer(JITTargetAddress Addr) { static_assert(std::is_pointer::value, "T must be a pointer type"); uintptr_t IntPtr = static_cast(Addr); @@ -48,6 +53,19 @@ template T jitTargetAddressToPointer(JITTargetAddress Addr) { return reinterpret_cast(IntPtr); } +/// Convert a JITTargetAddress to a callable function pointer. +/// +/// Casts the given address to a callable function pointer. This operation +/// will perform pointer signing for platforms that require it (e.g. arm64e). +template T jitTargetAddressToFunction(JITTargetAddress Addr) { + static_assert( + std::is_pointer::value && + std::is_function::type>::value, + "T must be a function pointer type"); + return jitTargetAddressToPointer(Addr); +} + +/// Convert a pointer to a JITTargetAddress. template JITTargetAddress pointerToJITTargetAddress(T *Ptr) { return static_cast(reinterpret_cast(Ptr)); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 4f22a4c387966..2f52edb8de257 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -45,8 +45,11 @@ using VModuleKey = uint64_t; // efficiency). using SymbolNameSet = DenseSet; +/// A vector of symbol names. +using SymbolNameVector = std::vector; + /// A map from symbol names (as SymbolStringPtrs) to JITSymbols -/// (address/flags pairs). +/// (address/flags pairs). using SymbolMap = DenseMap; /// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. @@ -55,8 +58,244 @@ using SymbolFlagsMap = DenseMap; /// A map from JITDylibs to sets of symbols. using SymbolDependenceMap = DenseMap; -/// A list of (JITDylib*, bool) pairs. -using JITDylibSearchList = std::vector>; +/// Lookup flags that apply to each dylib in the search order for a lookup. +/// +/// If MatchHiddenSymbolsOnly is used (the default) for a given dylib, then +/// only symbols in that Dylib's interface will be searched. If +/// MatchHiddenSymbols is used then symbols with hidden visibility will match +/// as well. +enum class JITDylibLookupFlags { MatchExportedSymbolsOnly, MatchAllSymbols }; + +/// Lookup flags that apply to each symbol in a lookup. +/// +/// If RequiredSymbol is used (the default) for a given symbol then that symbol +/// must be found during the lookup or the lookup will fail returning a +/// SymbolNotFound error. If WeaklyReferencedSymbol is used and the given +/// symbol is not found then the query will continue, and no result for the +/// missing symbol will be present in the result (assuming the rest of the +/// lookup succeeds). +enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol }; + +/// Describes the kind of lookup being performed. The lookup kind is passed to +/// symbol generators (if they're invoked) to help them determine what +/// definitions to generate. +/// +/// Static -- Lookup is being performed as-if at static link time (e.g. +/// generators representing static archives should pull in new +/// definitions). +/// +/// DLSym -- Lookup is being performed as-if at runtime (e.g. generators +/// representing static archives should not pull in new definitions). +enum class LookupKind { Static, DLSym }; + +/// A list of (JITDylib*, JITDylibLookupFlags) pairs to be used as a search +/// order during symbol lookup. +using JITDylibSearchOrder = + std::vector>; + +/// Convenience function for creating a search order from an ArrayRef of +/// JITDylib*, all with the same flags. +inline JITDylibSearchOrder makeJITDylibSearchOrder( + ArrayRef JDs, + JITDylibLookupFlags Flags = JITDylibLookupFlags::MatchExportedSymbolsOnly) { + JITDylibSearchOrder O; + O.reserve(JDs.size()); + for (auto *JD : JDs) + O.push_back(std::make_pair(JD, Flags)); + return O; +} + +/// A set of symbols to look up, each associated with a SymbolLookupFlags +/// value. +/// +/// This class is backed by a vector and optimized for fast insertion, +/// deletion and iteration. It does not guarantee a stable order between +/// operations, and will not automatically detect duplicate elements (they +/// can be manually checked by calling the validate method). +class SymbolLookupSet { +public: + using value_type = std::pair; + using UnderlyingVector = std::vector; + using iterator = UnderlyingVector::iterator; + using const_iterator = UnderlyingVector::const_iterator; + + SymbolLookupSet() = default; + + explicit SymbolLookupSet( + SymbolStringPtr Name, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + add(std::move(Name), Flags); + } + + /// Construct a SymbolLookupSet from an initializer list of SymbolStringPtrs. + explicit SymbolLookupSet( + std::initializer_list Names, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.reserve(Names.size()); + for (auto &Name : Names) + add(std::move(Name), Flags); + } + + /// Construct a SymbolLookupSet from a SymbolNameSet with the given + /// Flags used for each value. + explicit SymbolLookupSet( + const SymbolNameSet &Names, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.reserve(Names.size()); + for (const auto &Name : Names) + add(Name, Flags); + } + + /// Construct a SymbolLookupSet from a vector of symbols with the given Flags + /// used for each value. + /// If the ArrayRef contains duplicates it is up to the client to remove these + /// before using this instance for lookup. + explicit SymbolLookupSet( + ArrayRef Names, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.reserve(Names.size()); + for (const auto &Name : Names) + add(Name, Flags); + } + + /// Add an element to the set. The client is responsible for checking that + /// duplicates are not added. + void add(SymbolStringPtr Name, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.push_back(std::make_pair(std::move(Name), Flags)); + } + + bool empty() const { return Symbols.empty(); } + UnderlyingVector::size_type size() const { return Symbols.size(); } + iterator begin() { return Symbols.begin(); } + iterator end() { return Symbols.end(); } + const_iterator begin() const { return Symbols.begin(); } + const_iterator end() const { return Symbols.end(); } + + /// Removes the Ith element of the vector, replacing it with the last element. + void remove(UnderlyingVector::size_type I) { + std::swap(Symbols[I], Symbols.back()); + Symbols.pop_back(); + } + + /// Removes the element pointed to by the given iterator. This iterator and + /// all subsequent ones (including end()) are invalidated. + void remove(iterator I) { remove(I - begin()); } + + /// Removes all elements matching the given predicate, which must be callable + /// as bool(const SymbolStringPtr &, SymbolLookupFlags Flags). + template void remove_if(PredFn &&Pred) { + UnderlyingVector::size_type I = 0; + while (I != Symbols.size()) { + const auto &Name = Symbols[I].first; + auto Flags = Symbols[I].second; + if (Pred(Name, Flags)) + remove(I); + else + ++I; + } + } + + /// Loop over the elements of this SymbolLookupSet, applying the Body function + /// to each one. Body must be callable as + /// bool(const SymbolStringPtr &, SymbolLookupFlags). + /// If Body returns true then the element just passed in is removed from the + /// set. If Body returns false then the element is retained. + template + auto forEachWithRemoval(BodyFn &&Body) -> typename std::enable_if< + std::is_same(), + std::declval())), + bool>::value>::type { + UnderlyingVector::size_type I = 0; + while (I != Symbols.size()) { + const auto &Name = Symbols[I].first; + auto Flags = Symbols[I].second; + if (Body(Name, Flags)) + remove(I); + else + ++I; + } + } + + /// Loop over the elements of this SymbolLookupSet, applying the Body function + /// to each one. Body must be callable as + /// Expected(const SymbolStringPtr &, SymbolLookupFlags). + /// If Body returns a failure value, the loop exits immediately. If Body + /// returns true then the element just passed in is removed from the set. If + /// Body returns false then the element is retained. + template + auto forEachWithRemoval(BodyFn &&Body) -> typename std::enable_if< + std::is_same(), + std::declval())), + Expected>::value, + Error>::type { + UnderlyingVector::size_type I = 0; + while (I != Symbols.size()) { + const auto &Name = Symbols[I].first; + auto Flags = Symbols[I].second; + auto Remove = Body(Name, Flags); + if (!Remove) + return Remove.takeError(); + if (*Remove) + remove(I); + else + ++I; + } + return Error::success(); + } + + /// Construct a SymbolNameVector from this instance by dropping the Flags + /// values. + SymbolNameVector getSymbolNames() const { + SymbolNameVector Names; + Names.reserve(Symbols.size()); + for (auto &KV : Symbols) + Names.push_back(KV.first); + return Names; + } + + /// Sort the lookup set by pointer value. This sort is fast but sensitive to + /// allocation order and so should not be used where a consistent order is + /// required. + void sortByAddress() { + llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { + return LHS.first < RHS.first; + }); + } + + /// Sort the lookup set lexicographically. This sort is slow but the order + /// is unaffected by allocation order. + void sortByName() { + llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { + return *LHS.first < *RHS.first; + }); + } + + /// Remove any duplicate elements. If a SymbolLookupSet is not duplicate-free + /// by construction, this method can be used to turn it into a proper set. + void removeDuplicates() { + sortByAddress(); + auto LastI = std::unique(Symbols.begin(), Symbols.end()); + Symbols.erase(LastI, Symbols.end()); + } + +#ifndef NDEBUG + /// Returns true if this set contains any duplicates. This should only be used + /// in assertions. + bool containsDuplicates() { + if (Symbols.size() < 2) + return false; + sortByAddress(); + for (UnderlyingVector::size_type I = 1; I != Symbols.size(); ++I) + if (Symbols[I].first == Symbols[I - 1].first) + return true; + return true; + } +#endif + +private: + UnderlyingVector Symbols; +}; struct SymbolAliasMapEntry { SymbolAliasMapEntry() = default; @@ -76,6 +315,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym); /// Render a SymbolNameSet. raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols); +/// Render a SymbolNameVector. +raw_ostream &operator<<(raw_ostream &OS, const SymbolNameVector &Symbols); + /// Render a SymbolFlagsMap entry. raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV); @@ -98,8 +340,25 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps); /// Render a MaterializationUnit. raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU); -/// Render a JITDylibSearchList. -raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs); +//// Render a JITDylibLookupFlags instance. +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibLookupFlags &JDLookupFlags); + +/// Rendar a SymbolLookupFlags instance. +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags); + +/// Render a JITDylibLookupFlags instance. +raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); + +/// Render a SymbolLookupSet entry. +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet::value_type &KV); + +/// Render a SymbolLookupSet. +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet &LookupSet); + +/// Render a JITDylibSearchOrder. +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibSearchOrder &SearchOrder); /// Render a SymbolAliasMap. raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases); @@ -107,6 +366,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases); /// Render a SymbolState. raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S); +/// Render a LookupKind. +raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); + /// Callback to notify client that symbols have been resolved. using SymbolsResolvedCallback = unique_function)>; @@ -139,12 +401,13 @@ class SymbolsNotFound : public ErrorInfo { static char ID; SymbolsNotFound(SymbolNameSet Symbols); + SymbolsNotFound(SymbolNameVector Symbols); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; - const SymbolNameSet &getSymbols() const { return Symbols; } + const SymbolNameVector &getSymbols() const { return Symbols; } private: - SymbolNameSet Symbols; + SymbolNameVector Symbols; }; /// Used to notify clients that a set of symbols could not be removed. @@ -376,7 +639,8 @@ class ReExportsMaterializationUnit : public MaterializationUnit { /// Note: Care must be taken that no sets of aliases form a cycle, as such /// a cycle will result in a deadlock when any symbol in the cycle is /// resolved. - ReExportsMaterializationUnit(JITDylib *SourceJD, bool MatchNonExported, + ReExportsMaterializationUnit(JITDylib *SourceJD, + JITDylibLookupFlags SourceJDLookupFlags, SymbolAliasMap Aliases, VModuleKey K); StringRef getName() const override; @@ -387,7 +651,7 @@ class ReExportsMaterializationUnit : public MaterializationUnit { static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); JITDylib *SourceJD = nullptr; - bool MatchNonExported = false; + JITDylibLookupFlags SourceJDLookupFlags; SymbolAliasMap Aliases; }; @@ -405,25 +669,26 @@ class ReExportsMaterializationUnit : public MaterializationUnit { inline std::unique_ptr symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { return std::make_unique( - nullptr, true, std::move(Aliases), std::move(K)); + nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases), + std::move(K)); } /// Create a materialization unit for re-exporting symbols from another JITDylib /// with alternative names/flags. -/// If MatchNonExported is true then non-exported symbols from SourceJD can be -/// re-exported. If it is false, attempts to re-export a non-exported symbol -/// will result in a "symbol not found" error. +/// SourceJD will be searched using the given JITDylibLookupFlags. inline std::unique_ptr reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, - bool MatchNonExported = false, VModuleKey K = VModuleKey()) { + JITDylibLookupFlags SourceJDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly, + VModuleKey K = VModuleKey()) { return std::make_unique( - &SourceJD, MatchNonExported, std::move(Aliases), std::move(K)); + &SourceJD, SourceJDLookupFlags, std::move(Aliases), std::move(K)); } /// Build a SymbolAliasMap for the common case where you want to re-export /// symbols from another JITDylib with the same linkage/flags. Expected -buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols); +buildSimpleReexportsAAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols); /// Represents the state that a symbol has reached during materialization. enum class SymbolState : uint8_t { @@ -448,7 +713,7 @@ class AsynchronousSymbolQuery { /// Create a query for the given symbols. The NotifyComplete /// callback will be called once all queried symbols reach the given /// minimum state. - AsynchronousSymbolQuery(const SymbolNameSet &Symbols, + AsynchronousSymbolQuery(const SymbolLookupSet &Symbols, SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete); @@ -456,6 +721,15 @@ class AsynchronousSymbolQuery { void notifySymbolMetRequiredState(const SymbolStringPtr &Name, JITEvaluatedSymbol Sym); + /// Remove a symbol from the query. This is used to drop weakly referenced + /// symbols that are not found. + void dropSymbol(const SymbolStringPtr &Name) { + assert(ResolvedSymbols.count(Name) && + "Redundant removal of weakly-referenced symbol"); + ResolvedSymbols.erase(Name); + --OutstandingSymbolsCount; + } + /// Returns true if all symbols covered by this query have been /// resolved. bool isComplete() const { return OutstandingSymbolsCount == 0; } @@ -497,11 +771,21 @@ class JITDylib { friend class ExecutionSession; friend class MaterializationResponsibility; public: + /// Definition generators can be attached to JITDylibs to generate new + /// definitions for otherwise unresolved symbols during lookup. class DefinitionGenerator { public: virtual ~DefinitionGenerator(); - virtual Expected - tryToGenerate(JITDylib &Parent, const SymbolNameSet &Names) = 0; + + /// DefinitionGenerators should override this method to insert new + /// definitions into the parent JITDylib. K specifies the kind of this + /// lookup. JD specifies the target JITDylib being searched, and + /// JDLookupFlags specifies whether the search should match against + /// hidden symbols. Finally, Symbols describes the set of unresolved + /// symbols and their associated lookup flags. + virtual Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) = 0; }; using AsynchronousSymbolQuerySet = @@ -552,18 +836,20 @@ class JITDylib { /// as the first in the search order (instead of this dylib) ensures that /// definitions within this dylib resolve to the lazy-compiling stubs, /// rather than immediately materializing the definitions in this dylib. - void setSearchOrder(JITDylibSearchList NewSearchOrder, - bool SearchThisJITDylibFirst = true, - bool MatchNonExportedInThisDylib = true); + void setSearchOrder(JITDylibSearchOrder NewSearchOrder, + bool SearchThisJITDylibFirst = true); /// Add the given JITDylib to the search order for definitions in this /// JITDylib. - void addToSearchOrder(JITDylib &JD, bool MatcNonExported = false); + void addToSearchOrder(JITDylib &JD, + JITDylibLookupFlags JDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Replace OldJD with NewJD in the search order if OldJD is present. /// Otherwise this operation is a no-op. void replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, - bool MatchNonExported = false); + JITDylibLookupFlags JDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Remove the given JITDylib from the search order for this JITDylib if it is /// present. Otherwise this operation is a no-op. @@ -572,7 +858,7 @@ class JITDylib { /// Do something with the search order (run under the session lock). template auto withSearchOrderDo(Func &&F) - -> decltype(F(std::declval())); + -> decltype(F(std::declval())); /// Define all symbols provided by the materialization unit to be part of this /// JITDylib. @@ -605,8 +891,11 @@ class JITDylib { Error remove(const SymbolNameSet &Names); /// Search the given JITDylib for the symbols in Symbols. If found, store - /// the flags for each symbol in Flags. Returns any unresolved symbols. - Expected lookupFlags(const SymbolNameSet &Names); + /// the flags for each symbol in Flags. If any required symbols are not found + /// then an error will be returned. + Expected lookupFlags(LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet LookupSet); /// Dump current JITDylib state to OS. void dump(raw_ostream &OS); @@ -709,20 +998,23 @@ class JITDylib { Error defineImpl(MaterializationUnit &MU); - Expected lookupFlagsImpl(SymbolFlagsMap &Flags, - const SymbolNameSet &Names); + void lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved); - Error lodgeQuery(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs); + Error lodgeQuery(MaterializationUnitList &MUs, + std::shared_ptr &Q, LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved); - Error lodgeQueryImpl(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs); + Error lodgeQueryImpl(MaterializationUnitList &MUs, + std::shared_ptr &Q, + LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved); bool lookupImpl(std::shared_ptr &Q, std::vector> &MUs, - SymbolNameSet &Unresolved); + SymbolLookupSet &Unresolved); void detachQueryHelper(AsynchronousSymbolQuery &Q, const SymbolNameSet &QuerySymbols); @@ -754,7 +1046,7 @@ class JITDylib { UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; std::vector> DefGenerators; - JITDylibSearchList SearchOrder; + JITDylibSearchOrder SearchOrder; }; /// An ExecutionSession represents a running JIT program. @@ -787,10 +1079,6 @@ class ExecutionSession { return F(); } - /// Get the "main" JITDylib, which is created automatically on construction of - /// the ExecutionSession. - JITDylib &getMainJITDylib(); - /// Return a pointer to the "name" JITDylib. /// Ownership of JITDylib remains within Execution Session JITDylib *getJITDylibByName(StringRef Name); @@ -800,8 +1088,7 @@ class ExecutionSession { /// The JITDylib Name is required to be unique. Clients should verify that /// names are not being re-used (e.g. by calling getJITDylibByName) if names /// are based on user input. - JITDylib &createJITDylib(std::string Name, - bool AddToMainDylibSearchOrder = true); + JITDylib &createJITDylib(std::string Name); /// Allocate a module key for a new module to add to the JIT. VModuleKey allocateVModule() { @@ -863,8 +1150,9 @@ class ExecutionSession { /// dependenant symbols for this query (e.g. it is being made by a top level /// client to get an address to call) then the value NoDependenciesToRegister /// can be used. - void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols, - SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete, + void lookup(LookupKind K, const JITDylibSearchOrder &SearchOrder, + SymbolLookupSet Symbols, SymbolState RequiredState, + SymbolsResolvedCallback NotifyComplete, RegisterDependenciesFunction RegisterDependencies); /// Blocking version of lookup above. Returns the resolved symbol map. @@ -874,8 +1162,9 @@ class ExecutionSession { /// or an error occurs. If WaitUntilReady is false and an error occurs /// after resolution, the function will return a success value, but the /// error will be reported via reportErrors. - Expected lookup(const JITDylibSearchList &SearchOrder, - const SymbolNameSet &Symbols, + Expected lookup(const JITDylibSearchOrder &SearchOrder, + const SymbolLookupSet &Symbols, + LookupKind K = LookupKind::Static, SymbolState RequiredState = SymbolState::Ready, RegisterDependenciesFunction RegisterDependencies = NoDependenciesToRegister); @@ -883,7 +1172,7 @@ class ExecutionSession { /// Convenience version of blocking lookup. /// Searches each of the JITDylibs in the search order in turn for the given /// symbol. - Expected lookup(const JITDylibSearchList &SearchOrder, + Expected lookup(const JITDylibSearchOrder &SearchOrder, SymbolStringPtr Symbol); /// Convenience version of blocking lookup. @@ -951,7 +1240,7 @@ GeneratorT &JITDylib::addGenerator(std::unique_ptr DefGenerator) { template auto JITDylib::withSearchOrderDo(Func &&F) - -> decltype(F(std::declval())) { + -> decltype(F(std::declval())) { return ES.runSessionLocked([&]() { return F(SearchOrder); }); } @@ -997,15 +1286,17 @@ class ReexportsGenerator : public JITDylib::DefinitionGenerator { /// Create a reexports generator. If an Allow predicate is passed, only /// symbols for which the predicate returns true will be reexported. If no /// Allow predicate is passed, all symbols will be exported. - ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false, + ReexportsGenerator(JITDylib &SourceJD, + JITDylibLookupFlags SourceJDLookupFlags, SymbolPredicate Allow = SymbolPredicate()); - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) override; + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) override; private: JITDylib &SourceJD; - bool MatchNonExported = false; + JITDylibLookupFlags SourceJDLookupFlags; SymbolPredicate Allow; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index b9bbace6f6308..c797dbbbdfd97 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -40,6 +40,17 @@ namespace orc { class ObjectLayer; +/// Run a main function, returning the result. +/// +/// If the optional ProgramName argument is given then it will be inserted +/// before the strings in Args as the first argument to the called function. +/// +/// It is legal to have an empty argument list and no program name, however +/// many main functions will expect a name argument at least, and will fail +/// if none is provided. +int runAsMain(int (*Main)(int, char *[]), ArrayRef Args, + Optional ProgramName = None); + /// This iterator provides a convenient way to iterate over the elements /// of an llvm.global_ctors/llvm.global_dtors instance. /// @@ -268,8 +279,9 @@ class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { return Load(nullptr, GlobalPrefix, std::move(Allow)); } - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) override; + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; private: sys::DynamicLibrary Dylib; @@ -297,8 +309,9 @@ class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { static Expected> Create(ObjectLayer &L, std::unique_ptr ArchiveBuffer); - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) override; + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; private: StaticLibraryDefinitionGenerator(ObjectLayer &L, @@ -307,8 +320,7 @@ class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { ObjectLayer &L; std::unique_ptr ArchiveBuffer; - object::Archive Archive; - size_t UnrealizedObjects = 0; + std::unique_ptr Archive; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h index bcbd72e68f154..c8c4ecdaff160 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h @@ -91,6 +91,12 @@ class JITTargetMachineBuilder { return *this; } + /// Set subtarget features. + JITTargetMachineBuilder &setFeatures(StringRef FeatureString) { + Features = SubtargetFeatures(FeatureString); + return *this; + } + /// Add subtarget features. JITTargetMachineBuilder & addFeatures(const std::vector &FeatureVec); @@ -101,6 +107,17 @@ class JITTargetMachineBuilder { /// Access subtarget features. const SubtargetFeatures &getFeatures() const { return Features; } + /// Set TargetOptions. + /// + /// Note: This operation will overwrite any previously configured options, + /// including EmulatedTLS and ExplicitEmulatedTLS which + /// the JITTargetMachineBuilder sets by default. Clients are responsible + /// for re-enabling these overwritten options. + JITTargetMachineBuilder &setOptions(TargetOptions Options) { + this->Options = std::move(Options); + return *this; + } + /// Access TargetOptions. TargetOptions &getOptions() { return Options; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index 766a6b070f12f..f6b86bb231678 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -100,23 +100,27 @@ class Speculator { SymbolsInJD.insert(ImplSymbolName); } - DEBUG_WITH_TYPE("orc", for (auto &I - : SpeculativeLookUpImpls) { - llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib "; - for (auto &N : I.second) - llvm::dbgs() << "\n Likely Symbol : " << N; + DEBUG_WITH_TYPE("orc", { + for (auto &I : SpeculativeLookUpImpls) { + llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib "; + for (auto &N : I.second) + llvm::dbgs() << "\n Likely Symbol : " << N; + } }); // for a given symbol, there may be no symbol qualified for speculatively // compile try to fix this before jumping to this code if possible. for (auto &LookupPair : SpeculativeLookUpImpls) - ES.lookup(JITDylibSearchList({{LookupPair.first, true}}), - LookupPair.second, SymbolState::Ready, - [this](Expected Result) { - if (auto Err = Result.takeError()) - ES.reportError(std::move(Err)); - }, - NoDependenciesToRegister); + ES.lookup( + LookupKind::Static, + makeJITDylibSearchOrder(LookupPair.first, + JITDylibLookupFlags::MatchAllSymbols), + SymbolLookupSet(LookupPair.second), SymbolState::Ready, + [this](Expected Result) { + if (auto Err = Result.takeError()) + ES.reportError(std::move(Err)); + }, + NoDependenciesToRegister); } public: @@ -151,8 +155,11 @@ class Speculator { this->getES().reportError(ReadySymbol.takeError()); }; // Include non-exported symbols also. - ES.lookup(JITDylibSearchList({{JD, true}}), SymbolNameSet({Target}), - SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister); + ES.lookup( + LookupKind::Static, + makeJITDylibSearchOrder(JD, JITDylibLookupFlags::MatchAllSymbols), + SymbolLookupSet(Target, SymbolLookupFlags::WeaklyReferencedSymbol), + SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister); } } diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index ad9a35b554144..7ea0c95612403 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -237,8 +237,10 @@ namespace llvm { /// \param File File where this type is defined. /// \param LineNo Line number. /// \param Context The surrounding context for the typedef. + /// \param AlignInBits Alignment. (optional) DIDerivedType *createTypedef(DIType *Ty, StringRef Name, DIFile *File, - unsigned LineNo, DIScope *Context); + unsigned LineNo, DIScope *Context, + uint32_t AlignInBits = 0); /// Create debugging information entry for a 'friend'. DIDerivedType *createFriend(DIType *Ty, DIType *FriendTy); diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 0e43a05b318eb..d690ccece5fa6 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -3253,6 +3253,89 @@ class DIMacroFile : public DIMacroNode { } }; +/// Identifies a unique instance of a variable. +/// +/// Storage for identifying a potentially inlined instance of a variable, +/// or a fragment thereof. This guarantees that exactly one variable instance +/// may be identified by this class, even when that variable is a fragment of +/// an aggregate variable and/or there is another inlined instance of the same +/// source code variable nearby. +/// This class does not necessarily uniquely identify that variable: it is +/// possible that a DebugVariable with different parameters may point to the +/// same variable instance, but not that one DebugVariable points to multiple +/// variable instances. +class DebugVariable { + using FragmentInfo = DIExpression::FragmentInfo; + + const DILocalVariable *Variable; + Optional Fragment; + const DILocation *InlinedAt; + + /// Fragment that will overlap all other fragments. Used as default when + /// caller demands a fragment. + static const FragmentInfo DefaultFragment; + +public: + DebugVariable(const DILocalVariable *Var, Optional FragmentInfo, + const DILocation *InlinedAt) + : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} + + DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr, + const DILocation *InlinedAt) + : Variable(Var), + Fragment(DIExpr ? DIExpr->getFragmentInfo() : NoneType()), + InlinedAt(InlinedAt) {} + + const DILocalVariable *getVariable() const { return Variable; } + const Optional getFragment() const { return Fragment; } + const DILocation *getInlinedAt() const { return InlinedAt; } + + const FragmentInfo getFragmentOrDefault() const { + return Fragment.getValueOr(DefaultFragment); + } + + static bool isDefaultFragment(const FragmentInfo F) { + return F == DefaultFragment; + } + + bool operator==(const DebugVariable &Other) const { + return std::tie(Variable, Fragment, InlinedAt) == + std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); + } + + bool operator<(const DebugVariable &Other) const { + return std::tie(Variable, Fragment, InlinedAt) < + std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); + } +}; + +template <> struct DenseMapInfo { + using FragmentInfo = DIExpression::FragmentInfo; + + /// Empty key: no key should be generated that has no DILocalVariable. + static inline DebugVariable getEmptyKey() { + return DebugVariable(nullptr, NoneType(), nullptr); + } + + /// Difference in tombstone is that the Optional is meaningful. + static inline DebugVariable getTombstoneKey() { + return DebugVariable(nullptr, {{0, 0}}, nullptr); + } + + static unsigned getHashValue(const DebugVariable &D) { + unsigned HV = 0; + const Optional Fragment = D.getFragment(); + if (Fragment) + HV = DenseMapInfo::getHashValue(*Fragment); + + return hash_combine(D.getVariable(), HV, D.getInlinedAt()); + } + + static bool isEqual(const DebugVariable &A, const DebugVariable &B) { + return A == B; + } +}; + } // end namespace llvm #undef DEFINE_MDNODE_GET_UNPACK_IMPL diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 2d9c72108d3d4..24d39c2bc526f 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -265,7 +265,6 @@ class IRBuilderBase { void setConstrainedFPCallAttr(CallInst *I) { if (!I->hasFnAttr(Attribute::StrictFP)) I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); - setConstrainedFPFunctionAttr(); } //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 836911128ec46..1edce65c9ce67 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -446,6 +446,10 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic; def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic; def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic; + + // v8.3-A Floating-point complex add + def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic; } let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". @@ -786,6 +790,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_i32_ty], [IntrNoMem]>; + class AdvSIMD_Pred2VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class AdvSIMD_Pred3VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_Compare_Intrinsic : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -813,6 +832,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_anyvector_ty], [IntrNoMem]>; + class AdvSIMD_SVE_ShiftByImm_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_ShiftWide_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + llvm_nxv2i64_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_Unpack_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>], @@ -849,6 +882,26 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_FCVT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_FCVTZS_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorOfBitcastsToInt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_INSR_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMVectorElementType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -861,12 +914,29 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_SCVTF_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_TSMUL_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_CNTB_Intrinsic + : Intrinsic<[llvm_i64_ty], + [llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_CNTP_Intrinsic + : Intrinsic<[llvm_i64_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_DOT_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -882,6 +952,42 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_i32_ty], [IntrNoMem]>; +class AdvSIMD_GatherLoad_64bitOffset_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [ + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i64_ty> + ], + [IntrReadMem, IntrArgMemOnly]>; + + class SVE2_3VectorArg_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide2VectorType<0>, + LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + + class SVE2_3VectorArgIndexed_Long_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide2VectorType<0>, + LLVMSubdivide2VectorType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + // NOTE: There is no relationship between these intrinsics beyond an attempt + // to reuse currently identical class definitions. + class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic; + +class AdvSIMD_GatherLoad_32bitOffset_Intrinsic + : Intrinsic<[ llvm_anyvector_ty ], + [ + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMPointerToElt<0>, llvm_anyvector_ty + ], + [ IntrReadMem, IntrArgMemOnly ]>; + // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. class Builtin_SVCVT @@ -893,18 +999,19 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". // SVE let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". - - -class AdvSIMD_Pred2VectorArg_Intrinsic + class AdvSIMD_SVE_WHILE_Intrinsic : Intrinsic<[llvm_anyvector_ty], - [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [llvm_anyint_ty, LLVMMatchType<1>], + [IntrNoMem]>; -class AdvSIMD_Pred3VectorArg_Intrinsic +class AdvSIMD_GatherLoad_VecTorBase_Intrinsic : Intrinsic<[llvm_anyvector_ty], - [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - + [ + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty, + llvm_i64_ty + ], + [IntrReadMem, IntrArgMemOnly]>; // // Integer arithmetic @@ -914,12 +1021,6 @@ def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; -def int_aarch64_sve_bic_pred : AdvSIMD_Pred2VectorArg_Intrinsic; - def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; @@ -950,6 +1051,17 @@ def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic; def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; +// Shifts + +def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; +def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic; +def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic; +def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; +def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; + // // Counting bits // @@ -958,6 +1070,26 @@ def int_aarch64_sve_cls : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_clz : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic; +// +// Counting elements +// + +def int_aarch64_sve_cntb : AdvSIMD_SVE_CNTB_Intrinsic; +def int_aarch64_sve_cnth : AdvSIMD_SVE_CNTB_Intrinsic; +def int_aarch64_sve_cntw : AdvSIMD_SVE_CNTB_Intrinsic; +def int_aarch64_sve_cntd : AdvSIMD_SVE_CNTB_Intrinsic; + +def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic; + +// +// Reversal +// + +def int_aarch64_sve_rbit : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_revb : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_revh : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_revw : AdvSIMD_Merged1VectorArg_Intrinsic; + // // Permutations and selection // @@ -975,6 +1107,25 @@ def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic; def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_bic_base : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_bic : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_eor : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_ands : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_bics : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_eors : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_orr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_orn : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_nor : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_nand : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_orrs : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_orns : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_nors : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_nands : AdvSIMD_Pred2VectorArg_Intrinsic; + // // Conversion // @@ -986,6 +1137,19 @@ def int_aarch64_sve_uxtb : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_uxth : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic; +// +// While comparisons +// + +def int_aarch64_sve_whilele : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilelo : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilels : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilelt : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilege : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilegt : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilehs : AdvSIMD_SVE_WHILE_Intrinsic; +def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic; + // // Floating-point arithmetic // @@ -1048,6 +1212,16 @@ def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic; def int_aarch64_sve_fminv : AdvSIMD_SVE_Reduce_Intrinsic; def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic; +// +// Floating-point conversions +// + +def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic; +def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic; +def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic; +def int_aarch64_sve_scvtf : AdvSIMD_SVE_SCVTF_Intrinsic; +def int_aarch64_sve_ucvtf : AdvSIMD_SVE_SCVTF_Intrinsic; + // // Floating-point comparisons // @@ -1061,7 +1235,41 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic; def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic; def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic; -def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>; // // Predicate operations @@ -1069,4 +1277,56 @@ def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i3 def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic; def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic; + +// +// Gather loads: +// + +// scalar + vector, 64 bit unscaled offsets +def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; + +// scalar + vector, 64 bit scaled offsets +def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; + +// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) +// extended to 64 bits +def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; +def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; + +// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended +// to 64 bits +def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; +def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; + +// vector base + immediate index +def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic; + +// +// SVE2 - Non-widening pairwise arithmetic +// + +def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic; + +// +// SVE2 - Floating-point widening multiply-accumulate +// + +def int_aarch64_sve_fmlalb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_fmlalb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_fmlalt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_fmlalt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_fmlslb : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic; +def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; + +// +// SVE2 - Floating-point integer binary logarithm +// + +def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic; } diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 10417411edca2..c4061ea01eeec 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -426,8 +426,6 @@ let IntrProperties = [IntrNoMem, Commutative] in { def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic; def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic; def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic; - def int_arm_neon_vqadds : Neon_2Arg_Intrinsic; - def int_arm_neon_vqaddu : Neon_2Arg_Intrinsic; def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic; // Vector Multiply. @@ -459,8 +457,6 @@ let IntrProperties = [IntrNoMem, Commutative] in { // Vector Subtract. def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic; def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic; -def int_arm_neon_vqsubs : Neon_2Arg_Intrinsic; -def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic; def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic; // Vector Absolute Compare. @@ -777,10 +773,15 @@ class Neon_Dot_Intrinsic def int_arm_neon_udot : Neon_Dot_Intrinsic; def int_arm_neon_sdot : Neon_Dot_Intrinsic; -def int_arm_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_mve_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_mve_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_mve_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; +// vctp64 takes v4i1, to work around v2i1 not being a legal MVE type +def int_arm_mve_vctp64 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; + +// v8.3-A Floating-point complex add +def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic; +def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic; // GNU eabi mcount def int_arm_gnu_eabi_mcount : Intrinsic<[], @@ -800,12 +801,45 @@ multiclass IntrinsicSignSuffix rets, list params = [], def _u: Intrinsic; } +def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_and_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_bic_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_eor_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_orn_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_orr_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_mulh_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_rmulh_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty], [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; @@ -870,6 +904,9 @@ defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>; def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>; def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>; +def int_arm_mve_vabd: Intrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vadc: Intrinsic< [llvm_anyvector_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; @@ -877,6 +914,12 @@ def int_arm_mve_vadc_predicated: Intrinsic< [llvm_anyvector_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vmulh: Intrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_vrmulh: Intrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>; def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 5d4ce4955b996..6621fc9f819cd 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -366,7 +366,7 @@ inline api_pred_ty m_Negative(const APInt *&V) { struct is_nonnegative { bool isValue(const APInt &C) { return C.isNonNegative(); } }; -/// Match an integer or vector of nonnegative values. +/// Match an integer or vector of non-negative values. /// For vectors, this includes constants with undefined elements. inline cst_pred_ty m_NonNegative() { return cst_pred_ty(); @@ -375,6 +375,28 @@ inline api_pred_ty m_NonNegative(const APInt *&V) { return V; } +struct is_strictlypositive { + bool isValue(const APInt &C) { return C.isStrictlyPositive(); } +}; +/// Match an integer or vector of strictly positive values. +/// For vectors, this includes constants with undefined elements. +inline cst_pred_ty m_StrictlyPositive() { + return cst_pred_ty(); +} +inline api_pred_ty m_StrictlyPositive(const APInt *&V) { + return V; +} + +struct is_nonpositive { + bool isValue(const APInt &C) { return C.isNonPositive(); } +}; +/// Match an integer or vector of non-positive values. +/// For vectors, this includes constants with undefined elements. +inline cst_pred_ty m_NonPositive() { + return cst_pred_ty(); +} +inline api_pred_ty m_NonPositive(const APInt *&V) { return V; } + struct is_one { bool isValue(const APInt &C) { return C.isOneValue(); } }; @@ -1736,6 +1758,12 @@ struct m_Intrinsic_Ty { Argument_match>; }; +template +struct m_Intrinsic_Ty { + using Ty = match_combine_and::Ty, + Argument_match>; +}; + /// Match intrinsic calls like this: /// m_Intrinsic(m_Value(X)) template inline IntrinsicID_match m_Intrinsic() { @@ -1766,6 +1794,15 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { return m_CombineAnd(m_Intrinsic(Op0, Op1, Op2), m_Argument<3>(Op3)); } +template +inline typename m_Intrinsic_Ty::Ty +m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, + const T4 &Op4) { + return m_CombineAnd(m_Intrinsic(Op0, Op1, Op2, Op3), + m_Argument<4>(Op4)); +} + // Helper intrinsic matching specializations. template inline typename m_Intrinsic_Ty::Ty m_BitReverse(const Opnd0 &Op0) { diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index dbae32e843936..574cb69360ac0 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -408,6 +408,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry&); void initializeThreadSanitizerLegacyPassPass(PassRegistry&); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAAWrapperPassPass(PassRegistry&); +void initializeTypePromotionPass(PassRegistry&); void initializeUnifyFunctionExitNodesPass(PassRegistry&); void initializeUnpackMachineBundlesPass(PassRegistry&); void initializeUnreachableBlockElimLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 589f1dfe90b81..5a6dff64caef7 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -333,6 +333,10 @@ class MCAsmInfo { /// protected visibility. Defaults to MCSA_Protected MCSymbolAttr ProtectedVisibilityAttr = MCSA_Protected; + // This attribute is used to indicate symbols such as commons on AIX may have + // a storage mapping class embedded in the name. + bool SymbolsHaveSMC = false; + //===--- Dwarf Emission Directives -----------------------------------===// /// True if target supports emission of debugging information. Defaults to @@ -587,6 +591,8 @@ class MCAsmInfo { return ProtectedVisibilityAttr; } + bool getSymbolsHaveSMC() const { return SymbolsHaveSMC; } + bool doesSupportDebugInformation() const { return SupportsDebugInformation; } bool doesSupportExceptionHandling() const { diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 12d681ffbebc9..2f7f5d64b466d 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -111,6 +111,7 @@ class MCObjectFileInfo { MCSection *DwarfLineDWOSection = nullptr; MCSection *DwarfLocDWOSection = nullptr; MCSection *DwarfStrOffDWOSection = nullptr; + MCSection *DwarfMacinfoDWOSection = nullptr; /// The DWARF v5 string offset and address table sections. MCSection *DwarfStrOffSection = nullptr; @@ -303,6 +304,9 @@ class MCObjectFileInfo { MCSection *getDwarfLoclistsDWOSection() const { return DwarfLoclistsDWOSection; } + MCSection *getDwarfMacinfoDWOSection() const { + return DwarfMacinfoDWOSection; + } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } MCSection *getDwarfSwiftASTSection() const { return DwarfSwiftASTSection; } diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index c7dc56ea588e9..9280dc75e50bc 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -16,11 +16,13 @@ #define LLVM_MC_MCREGISTERINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegister.h" #include #include +#include #include namespace llvm { @@ -177,6 +179,9 @@ class MCRegisterInfo { DenseMap L2CVRegs; // LLVM to CV regs mapping public: + // Forward declaration to become a friend class of DiffListIterator. + template class mc_difflist_iterator; + /// DiffListIterator - Base iterator class that can traverse the /// differentially encoded register and regunit lists in DiffLists. /// Don't use this class directly, use one of the specialized sub-classes @@ -220,8 +225,105 @@ class MCRegisterInfo { if (!advance()) List = nullptr; } + + template friend class MCRegisterInfo::mc_difflist_iterator; }; + /// Forward iterator using DiffListIterator. + template + class mc_difflist_iterator + : public iterator_facade_base, + std::forward_iterator_tag, MCPhysReg> { + MCRegisterInfo::DiffListIterator Iter; + /// Current value as MCPhysReg, so we can return a reference to it. + MCPhysReg Val; + + protected: + mc_difflist_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {} + + // Allow conversion between instantiations where valid. + mc_difflist_iterator(MCRegister Reg, const MCPhysReg *DiffList) { + Iter.init(Reg, DiffList); + Val = *Iter; + } + + public: + // Allow default construction to build variables, but this doesn't build + // a useful iterator. + mc_difflist_iterator() = default; + + /// Return an iterator past the last element. + static SubT end() { + SubT End; + End.Iter.List = nullptr; + return End; + } + + bool operator==(const mc_difflist_iterator &Arg) const { + return Iter.List == Arg.Iter.List; + } + + const MCPhysReg &operator*() const { return Val; } + + using mc_difflist_iterator::iterator_facade_base::operator++; + void operator++() { + assert(Iter.List && "Cannot increment the end iterator!"); + ++Iter; + Val = *Iter; + } + }; + + /// Forward iterator over all sub-registers. + /// TODO: Replace remaining uses of MCSubRegIterator. + class mc_subreg_iterator : public mc_difflist_iterator { + public: + mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter) + : mc_difflist_iterator(Iter) {} + mc_subreg_iterator() = default; + mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) + : mc_difflist_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {} + }; + + /// Forward iterator over all super-registers. + /// TODO: Replace remaining uses of MCSuperRegIterator. + class mc_superreg_iterator + : public mc_difflist_iterator { + public: + mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter) + : mc_difflist_iterator(Iter) {} + mc_superreg_iterator() = default; + mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) + : mc_difflist_iterator(Reg, + MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {} + }; + + /// Return an iterator range over all sub-registers of \p Reg, excluding \p + /// Reg. + iterator_range subregs(MCRegister Reg) const { + return make_range(std::next(mc_subreg_iterator(Reg, this)), + mc_subreg_iterator::end()); + } + + /// Return an iterator range over all sub-registers of \p Reg, including \p + /// Reg. + iterator_range subregs_inclusive(MCRegister Reg) const { + return make_range({Reg, this}, mc_subreg_iterator::end()); + } + + /// Return an iterator range over all super-registers of \p Reg, excluding \p + /// Reg. + iterator_range superregs(MCRegister Reg) const { + return make_range(std::next(mc_superreg_iterator(Reg, this)), + mc_superreg_iterator::end()); + } + + /// Return an iterator range over all super-registers of \p Reg, including \p + /// Reg. + iterator_range + superregs_inclusive(MCRegister Reg) const { + return make_range({Reg, this}, mc_superreg_iterator::end()); + } + // These iterators are allowed to sub-class DiffListIterator and access // internal list pointers. friend class MCSubRegIterator; diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h index 8bc7817404392..07dfb5d299776 100644 --- a/llvm/include/llvm/MC/MCSymbolXCOFF.h +++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h @@ -9,6 +9,7 @@ #define LLVM_MC_MCSYMBOLXCOFF_H #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCSymbol.h" @@ -50,6 +51,17 @@ class MCSymbolXCOFF : public MCSymbol { bool hasContainingCsect() const { return ContainingCsect != nullptr; } + StringRef getUnqualifiedName() const { + const StringRef name = getName(); + if (name.back() == ']') { + StringRef lhs, rhs; + std::tie(lhs, rhs) = name.rsplit('['); + assert(!rhs.empty() && "Invalid SMC format in XCOFF symbol."); + return lhs; + } + return name; + } + private: Optional StorageClass; MCSectionXCOFF *ContainingCsect = nullptr; diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index dc848cee7e574..42c5b67ac3fa8 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -402,12 +402,17 @@ ELFFile::getSectionContentsAsArray(const Elf_Shdr *Sec) const { " has an invalid sh_size (" + Twine(Size) + ") which is not a multiple of its sh_entsize (" + Twine(Sec->sh_entsize) + ")"); - if ((std::numeric_limits::max() - Offset < Size) || - Offset + Size > Buf.size()) + if (std::numeric_limits::max() - Offset < Size) return createError("section " + getSecIndexForError(this, Sec) + " has a sh_offset (0x" + Twine::utohexstr(Offset) + ") + sh_size (0x" + Twine::utohexstr(Size) + ") that cannot be represented"); + if (Offset + Size > Buf.size()) + return createError("section " + getSecIndexForError(this, Sec) + + " has a sh_offset (0x" + Twine::utohexstr(Offset) + + ") + sh_size (0x" + Twine::utohexstr(Size) + + ") that is greater than the file size (0x" + + Twine::utohexstr(Buf.size()) + ")"); if (Offset % alignof(T)) // TODO: this error is untested. diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index a498621a2a13f..7e0244ed08e7d 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -324,7 +324,8 @@ struct VerneedEntry { }; struct VerneedSection : Section { - std::vector VerneedV; + Optional Content; + Optional> VerneedV; llvm::yaml::Hex64 Info; VerneedSection() : Section(ChunkKind::Verneed) {} @@ -397,7 +398,9 @@ struct VerdefEntry { }; struct VerdefSection : Section { - std::vector Entries; + Optional> Entries; + Optional Content; + llvm::yaml::Hex64 Info; VerdefSection() : Section(ChunkKind::Verdef) {} @@ -478,7 +481,7 @@ struct Object { // top-level key, which automatically ensures that invariants like there // being a single SHT_SYMTAB section are upheld. Optional> Symbols; - std::vector DynamicSymbols; + Optional> DynamicSymbols; std::vector
getSections() { std::vector
Ret; diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 63784463e1718..6c0bb6c2fc3ad 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -20,6 +20,8 @@ #define LLVM_SUPPORT_COMMANDLINE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -29,6 +31,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -1831,7 +1834,7 @@ void PrintHelpMessage(bool Hidden = false, bool Categorized = false); // /// Use this to get a StringMap to all registered named options -/// (e.g. -help). Note \p Map Should be an empty StringMap. +/// (e.g. -help). /// /// \return A reference to the StringMap used by the cl APIs to parse options. /// @@ -1964,10 +1967,16 @@ bool readConfigFile(StringRef CfgFileName, StringSaver &Saver, /// with nullptrs in the Argv vector. /// \param [in] RelativeNames true if names of nested response files must be /// resolved relative to including file. +/// \param [in] FS File system used for all file access when running the tool. +/// \param [in] CurrentDir Path used to resolve relative rsp files. If set to +/// None, process' cwd is used instead. /// \return true if all @files were expanded successfully or there were none. -bool ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, - SmallVectorImpl &Argv, - bool MarkEOLs = false, bool RelativeNames = false); +bool ExpandResponseFiles( + StringSaver &Saver, TokenizerCallback Tokenizer, + SmallVectorImpl &Argv, bool MarkEOLs = false, + bool RelativeNames = false, + llvm::vfs::FileSystem &FS = *llvm::vfs::getRealFileSystem(), + llvm::Optional CurrentDir = llvm::None); /// Mark all options not part of this category as cl::ReallyHidden. /// diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 07fd94e29a1fb..df0b02c1335d4 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -107,6 +107,18 @@ struct KnownBits { Zero.setSignBit(); } + /// Return the minimal value possible given these KnownBits. + APInt getMinValue() const { + // Assume that all bits that aren't known-ones are zeros. + return One; + } + + /// Return the maximal value possible given these KnownBits. + APInt getMaxValue() const { + // Assume that all bits that aren't known-zeros are ones. + return ~Zero; + } + /// Truncate the underlying known Zero and One bits. This is equivalent /// to truncating the value we're tracking. KnownBits trunc(unsigned BitWidth) const { diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h index 0e02b6e7d750a..6ef7c298bc28f 100644 --- a/llvm/include/llvm/Support/LowLevelTypeImpl.h +++ b/llvm/include/llvm/Support/LowLevelTypeImpl.h @@ -137,6 +137,8 @@ class LLT { : LLT::scalar(NewEltSize); } + bool isByteSized() const { return (getSizeInBits() & 7) == 0; } + unsigned getScalarSizeInBits() const { assert(RawData != 0 && "Invalid Type"); if (!IsVector) { diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h index 488f17427fd7f..97955f882d51e 100644 --- a/llvm/include/llvm/Support/Path.h +++ b/llvm/include/llvm/Support/Path.h @@ -152,18 +152,33 @@ void replace_extension(SmallVectorImpl &path, const Twine &extension, /// /// @code /// /foo, /old, /new => /foo +/// /old, /old, /new => /new +/// /old, /old/, /new, false => /old +/// /old, /old/, /new, true => /new /// /old/foo, /old, /new => /new/foo +/// /old/foo, /old/, /new => /new/foo +/// /old/foo, /old/, /new/ => /new/foo +/// /oldfoo, /old, /new => /oldfoo /// /foo, , /new => /new/foo -/// /old/foo, /old, => /foo +/// /foo, , new => new/foo +/// /old/foo, /old, , false => /foo +/// /old/foo, /old, , true => foo /// @endcode /// /// @param Path If \a Path starts with \a OldPrefix modify to instead /// start with \a NewPrefix. -/// @param OldPrefix The path prefix to strip from \a Path. +/// @param OldPrefix The path prefix to strip from \a Path. Any trailing +/// path separator is ignored if strict is true. /// @param NewPrefix The path prefix to replace \a NewPrefix with. -void replace_path_prefix(SmallVectorImpl &Path, +/// @param style The path separator style +/// @param strict If strict is true, a directory separator following +/// \a OldPrefix will also be stripped. Otherwise, directory +/// separators will only be matched and stripped when present +/// in \a OldPrefix. +/// @result true if \a Path begins with OldPrefix +bool replace_path_prefix(SmallVectorImpl &Path, const StringRef &OldPrefix, const StringRef &NewPrefix, - Style style = Style::native); + Style style = Style::native, bool strict = false); /// Append to path. /// diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h index 8cc430d0bc727..2b51bba0e7f86 100644 --- a/llvm/include/llvm/Support/TimeProfiler.h +++ b/llvm/include/llvm/Support/TimeProfiler.h @@ -19,7 +19,8 @@ extern TimeTraceProfiler *TimeTraceProfilerInstance; /// Initialize the time trace profiler. /// This sets up the global \p TimeTraceProfilerInstance /// variable to be the profiler instance. -void timeTraceProfilerInitialize(unsigned TimeTraceGranularity); +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, + StringRef ProcName); /// Cleanup the time trace profiler, if it was initialized. void timeTraceProfilerCleanup(); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index c395e5bcecf17..d1db4eceabb88 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -107,7 +107,7 @@ namespace llvm { public: TargetOptions() : PrintMachineCode(false), UnsafeFPMath(false), NoInfsFPMath(false), - NoNaNsFPMath(false), NoTrappingFPMath(false), + NoNaNsFPMath(false), NoTrappingFPMath(true), NoSignedZerosFPMath(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 441f3d7d118d1..9543086c4da72 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -224,13 +224,13 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; -def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> +def SDTMaskedStore: SDTypeProfile<0, 4, [ // masked store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameNumEltsAs<0, 3> ]>; -def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>, - SDTCisSameNumEltsAs<0, 2> +def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameAs<0, 4>, + SDTCisSameNumEltsAs<0, 3> ]>; def SDTVecShuffle : SDTypeProfile<1, 2, [ diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index a75a047b7fd0d..4f6f823a230b7 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1820,6 +1820,42 @@ struct DerefState : AbstractState { /// State representing for dereferenceable bytes. IncIntegerState<> DerefBytesState; + /// Map representing for accessed memory offsets and sizes. + /// A key is Offset and a value is size. + /// If there is a load/store instruction something like, + /// p[offset] = v; + /// (offset, sizeof(v)) will be inserted to this map. + /// std::map is used because we want to iterate keys in ascending order. + std::map AccessedBytesMap; + + /// Helper function to calculate dereferenceable bytes from current known + /// bytes and accessed bytes. + /// + /// int f(int *A){ + /// *A = 0; + /// *(A+2) = 2; + /// *(A+1) = 1; + /// *(A+10) = 10; + /// } + /// ``` + /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`. + /// AccessedBytesMap is std::map so it is iterated in accending order on + /// key(Offset). So KnownBytes will be updated like this: |Access | KnownBytes + /// |(0, 4)| 0 -> 4 + /// |(4, 4)| 4 -> 8 + /// |(8, 4)| 8 -> 12 + /// |(40, 4) | 12 (break) + void computeKnownDerefBytesFromAccessedMap() { + int64_t KnownBytes = DerefBytesState.getKnown(); + for (auto &Access : AccessedBytesMap) { + if (KnownBytes < Access.first) + break; + KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second); + } + + DerefBytesState.takeKnownMaximum(KnownBytes); + } + /// State representing that whether the value is globaly dereferenceable. BooleanState GlobalState; @@ -1849,6 +1885,9 @@ struct DerefState : AbstractState { /// Update known dereferenceable bytes. void takeKnownDerefBytesMaximum(uint64_t Bytes) { DerefBytesState.takeKnownMaximum(Bytes); + + // Known bytes might increase. + computeKnownDerefBytesFromAccessedMap(); } /// Update assumed dereferenceable bytes. @@ -1856,6 +1895,14 @@ struct DerefState : AbstractState { DerefBytesState.takeAssumedMinimum(Bytes); } + /// Add accessed bytes to the map. + void addAccessedBytes(int64_t Offset, uint64_t Size) { + AccessedBytesMap[Offset] = std::max(AccessedBytesMap[Offset], Size); + + // Known bytes might increase. + computeKnownDerefBytesFromAccessedMap(); + } + /// Equality for DerefState. bool operator==(const DerefState &R) { return this->DerefBytesState == R.DerefBytesState && diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 88c2ef787ad81..610668adcfa55 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -175,6 +175,7 @@ class LibCallSimplifier { Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemCCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h index 4614007a64581..ba0f86c452637 100644 --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -21,6 +21,7 @@ using namespace llvm; extern cl::opt EnablePGSO; extern cl::opt PGSOLargeWorkingSetSizeOnly; +extern cl::opt PGSOIRPassOrTestOnly; extern cl::opt PGSOColdCodeOnly; extern cl::opt ForcePGSO; extern cl::opt PgsoCutoffInstrProf; @@ -33,9 +34,15 @@ class BlockFrequencyInfo; class Function; class ProfileSummaryInfo; +enum class PGSOQueryType { + IRPass, // A query call from an IR-level transform pass. + Test, // A query call from a unit test. + Other, // Others. +}; + template bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, - BFIT *BFI) { + BFIT *BFI, PGSOQueryType QueryType) { assert(F); if (!PSI || !BFI || !PSI->hasProfileSummary()) return false; @@ -43,6 +50,11 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, return true; if (!EnablePGSO) return false; + // Temporarily enable size optimizations only for the IR pass or test query + // sites for gradual commit/rollout. This is to be removed later. + if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || + QueryType == PGSOQueryType::Test)) + return false; if (PGSOColdCodeOnly || (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { // Even if the working set size isn't large, size-optimize cold code. @@ -55,7 +67,7 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, template bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, - BFIT *BFI) { + BFIT *BFI, PGSOQueryType QueryType) { assert(BB); if (!PSI || !BFI || !PSI->hasProfileSummary()) return false; @@ -63,6 +75,11 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, return true; if (!EnablePGSO) return false; + // Temporarily enable size optimizations only for the IR pass or test query + // sites for gradual commit/rollout. This is to be removed later. + if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || + QueryType == PGSOQueryType::Test)) + return false; if (PGSOColdCodeOnly || (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { // Even if the working set size isn't large, size-optimize cold code. @@ -73,15 +90,17 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, BB, PSI, BFI); } -/// Returns true if function \p F is suggested to be size-optimized base on the +/// Returns true if function \p F is suggested to be size-optimized based on the /// profile. bool shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); + BlockFrequencyInfo *BFI, + PGSOQueryType QueryType = PGSOQueryType::Other); -/// Returns true if basic block \p BB is suggested to be size-optimized base -/// on the profile. +/// Returns true if basic block \p BB is suggested to be size-optimized based on +/// the profile. bool shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); + BlockFrequencyInfo *BFI, + PGSOQueryType QueryType = PGSOQueryType::Other); } // end namespace llvm diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 7bd237b9ad537..ffba65b5ed5ee 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -/// Add \p BB to PostDominatedByUnreachable set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable since - // the @llvm.experimental.deoptimize call is expected to practically - // never execute. - BB->getTerminatingDeoptimizeCall()) - PostDominatedByUnreachable.insert(BB); - return; - } +static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, + SmallVectorImpl &WorkList, + SmallPtrSetImpl &TargetSet) { + SmallVector Descendants; + SmallPtrSet NewItems; + + PDT->getDescendants(const_cast(BB), Descendants); + for (auto *BB : Descendants) + if (TargetSet.insert(BB).second) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (!TargetSet.count(*PI)) + NewItems.insert(*PI); + WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); +} - // If the terminator is an InvokeInst, check only the normal destination block - // as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(TI)) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - PostDominatedByUnreachable.insert(BB); - return; +/// Compute a set of basic blocks that are post-dominated by unreachables. +void BranchProbabilityInfo::computePostDominatedByUnreachable( + const Function &F, PostDominatorTree *PDT) { + SmallVector WorkList; + for (auto &BB : F) { + const Instruction *TI = BB.getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa(TI) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since the @llvm.experimental.deoptimize call is expected to + // practically never execute. + BB.getTerminatingDeoptimizeCall()) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); + } } - for (auto *I : successors(BB)) - // If any of successor is not post dominated then BB is also not. - if (!PostDominatedByUnreachable.count(I)) - return; - - PostDominatedByUnreachable.insert(BB); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (PostDominatedByUnreachable.count(BB)) + continue; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(BB->getTerminator())) { + if (PostDominatedByUnreachable.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } + // If all the successors are unreachable, BB is unreachable as well. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByUnreachable.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } } -/// Add \p BB to PostDominatedByColdCall set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { - assert(!PostDominatedByColdCall.count(BB)); - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) - return; +/// compute a set of basic blocks that are post-dominated by ColdCalls. +void BranchProbabilityInfo::computePostDominatedByColdCall( + const Function &F, PostDominatorTree *PDT) { + SmallVector WorkList; + for (auto &BB : F) + for (auto &I : BB) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); - // If all of successor are post dominated then BB is also done. - if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { - return PostDominatedByColdCall.count(SuccBB); - })) { - PostDominatedByColdCall.insert(BB); - return; - } + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(TI)) - if (PostDominatedByColdCall.count(II->getNormalDest())) { - PostDominatedByColdCall.insert(BB); - return; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(BB->getTerminator())) { + if (PostDominatedByColdCall.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); } - - // Otherwise, if the block itself contains a cold function, add it to the - // set of blocks post-dominated by a cold call. - for (auto &I : *BB) - if (const CallInst *CI = dyn_cast(&I)) - if (CI->hasFnAttr(Attribute::Cold)) { - PostDominatedByColdCall.insert(BB); - return; - } + // If all of successor are post dominated then BB is also done. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByColdCall.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); + } } /// Calculate edge weights for successors lead to unreachable. @@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } + std::unique_ptr PDT = + std::make_unique(const_cast(F)); + computePostDominatedByUnreachable(F, PDT.get()); + computePostDominatedByColdCall(F, PDT.get()); + // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); - updatePostDominatedByUnreachable(BB); - updatePostDominatedByColdCall(BB); // If there is no at least two successors, no sense to set probability. if (BB->getTerminator()->getNumSuccessors() < 2) continue; diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index 82ccea06f28b6..90ce13e6f6503 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -9,7 +9,9 @@ // The implementation for the data dependence graph. //===----------------------------------------------------------------------===// #include "llvm/Analysis/DDG.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -179,19 +181,28 @@ using BasicBlockListType = SmallVector; DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) : DependenceGraphInfo(F.getName().str(), D) { + // Put the basic blocks in program order for correct dependence + // directions. BasicBlockListType BBList; - for (auto &BB : F.getBasicBlockList()) - BBList.push_back(&BB); + for (auto &SCC : make_range(scc_begin(&F), scc_end(&F))) + for (BasicBlock * BB : SCC) + BBList.push_back(BB); + std::reverse(BBList.begin(), BBList.end()); DDGBuilder(*this, D, BBList).populate(); } -DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D) +DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI, + DependenceInfo &D) : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." + L.getHeader()->getName()) .str(), D) { + // Put the basic blocks in program order for correct dependence + // directions. + LoopBlocksDFS DFS(&L); + DFS.perform(&LI); BasicBlockListType BBList; - for (BasicBlock *BB : L.blocks()) + for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) BBList.push_back(BB); DDGBuilder(*this, D, BBList).populate(); } @@ -259,7 +270,7 @@ DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR) { Function *F = L.getHeader()->getParent(); DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); - return std::make_unique(L, DI); + return std::make_unique(L, AR.LI, DI); } AnalysisKey DDGAnalysis::Key; diff --git a/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/llvm/lib/Analysis/DependenceGraphBuilder.cpp index 115f5d6e814b8..98bb09d792b23 100644 --- a/llvm/lib/Analysis/DependenceGraphBuilder.cpp +++ b/llvm/lib/Analysis/DependenceGraphBuilder.cpp @@ -353,5 +353,34 @@ void AbstractDependenceGraphBuilder::createMemoryDependencyEdges() { } } +template +void AbstractDependenceGraphBuilder::sortNodesTopologically() { + + // If we don't create pi-blocks, then we may not have a DAG. + if (!shouldCreatePiBlocks()) + return; + + SmallVector NodesInPO; + using NodeKind = typename NodeType::NodeKind; + for (NodeType *N : post_order(&Graph)) { + if (N->getKind() == NodeKind::PiBlock) { + // Put members of the pi-block right after the pi-block itself, for + // convenience. + const NodeListType &PiBlockMembers = getNodesInPiBlock(*N); + NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(), + PiBlockMembers.end()); + } + NodesInPO.push_back(N); + } + + size_t OldSize = Graph.Nodes.size(); + Graph.Nodes.clear(); + for (NodeType *N : reverse(NodesInPO)) + Graph.Nodes.push_back(N); + if (Graph.Nodes.size() != OldSize) + assert(false && + "Expected the number of nodes to stay the same after the sort"); +} + template class llvm::AbstractDependenceGraphBuilder; template class llvm::DependenceGraphInfo; diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index ce99226087fa2..3c33aa973cdd6 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -721,6 +721,13 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( if (I->getParent()->getTerminator() == I) return false; + // Do not try to sink an instruction multiple times (if multiple operands + // are first order recurrences). + // TODO: We can support this case, by sinking the instruction after the + // 'deepest' previous instruction. + if (SinkAfter.find(I) != SinkAfter.end()) + return false; + if (DT->dominates(Previous, I)) // We already are good w/o sinking. return true; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 1ba03de69890b..55ce940bc3a5e 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -51,7 +51,7 @@ static cl::opt InlineThreshold( cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt @@ -63,7 +63,7 @@ static cl::opt // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt @@ -149,6 +149,9 @@ class CallAnalyzer : public InstVisitor { bool HasUninlineableIntrinsic = false; bool InitsVargArgs = false; + /// Attempt to evaluate indirect calls to boost its inline cost. + bool BoostIndirectCalls; + /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; @@ -295,13 +298,14 @@ class CallAnalyzer : public InstVisitor { std::function &GetAssumptionCache, Optional> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallBase &Call, const InlineParams &Params) + Function &Callee, CallBase &Call, const InlineParams &Params, + bool BoostIndirect = true) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), - EnableLoadElimination(true) {} + BoostIndirectCalls(BoostIndirect), EnableLoadElimination(true) {} InlineResult analyzeCall(CallBase &Call); @@ -423,9 +427,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { Operands.push_back(GEP.getOperand(0)); for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) - Operands.push_back(SimpleOp); - else - Operands.push_back(*I); + Operands.push_back(SimpleOp); + else + Operands.push_back(*I); return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); } @@ -1239,97 +1243,93 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (isa(Call) && cast(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - if (Function *F = Call.getCalledFunction()) { - // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, Call)) - return true; - - // Next check if it is an intrinsic we know about. - // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast(&Call)) { - switch (II->getIntrinsicID()) { - default: - if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) - disableLoadElimination(); - return Base::visitCallBase(Call); - - case Intrinsic::load_relative: - // This is normally lowered to 4 LLVM instructions. - addCost(3 * InlineConstants::InstrCost); - return false; + Value *Callee = Call.getCalledOperand(); + Function *F = dyn_cast_or_null(Callee); + bool IsIndirectCall = !F; + if (IsIndirectCall) { + // Check if this happens to be an indirect function call to a known function + // in this inline context. If not, we've done all we can. + F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) { + // Pay the price of the argument setup. We account for the average 1 + // instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: + if (!Call.onlyReadsMemory()) disableLoadElimination(); - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - case Intrinsic::icall_branch_funnel: - case Intrinsic::localescape: - HasUninlineableIntrinsic = true; - return false; - case Intrinsic::vastart: - InitsVargArgs = true; - return false; - } + return Base::visitCallBase(Call); } + } - if (F == Call.getFunction()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } + assert(F && "Expected a call to a known function"); - if (TTI.isLoweredToCall(F)) { - // We account for the average 1 instruction per call argument setup - // here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, Call)) + return true; - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa(Call.getCalledValue())) - addCost(InlineConstants::CallPenalty); - } + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(&Call)) { + switch (II->getIntrinsicID()) { + default: + if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); + return Base::visitCallBase(Call); + + case Intrinsic::load_relative: + // This is normally lowered to 4 LLVM instructions. + addCost(3 * InlineConstants::InstrCost); + return false; - if (!Call.onlyReadsMemory()) + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: disableLoadElimination(); - return Base::visitCallBase(Call); + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + case Intrinsic::icall_branch_funnel: + case Intrinsic::localescape: + HasUninlineableIntrinsic = true; + return false; + case Intrinsic::vastart: + InitsVargArgs = true; + return false; + } } - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = Call.getCalledValue(); - - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); - - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) { - if (!Call.onlyReadsMemory()) - disableLoadElimination(); - return Base::visitCallBase(Call); + if (F == Call.getFunction()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; } - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - auto IndirectCallParams = Params; - IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, - IndirectCallParams); - if (CA.analyzeCall(Call)) { - // We were able to inline the indirect call! Subtract the cost from the - // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan out. + // Pretend to inline the function, with a custom threshold. + if (IsIndirectCall && BoostIndirectCalls) { + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = + InlineConstants::IndirectCallThreshold; + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, + IndirectCallParams, false); + if (CA.analyzeCall(Call)) { + // We were able to inline the indirect call! Subtract the cost from the + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + } + } else + // Otherwise simply add the cost for merely making the call. + addCost(InlineConstants::CallPenalty); } - if (!F->onlyReadsMemory()) + if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory()))) disableLoadElimination(); return Base::visitCallBase(Call); } @@ -1494,7 +1494,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; int64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; addCost(SwitchCost, (int64_t)CostUpperBound); return false; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d997acb365c47..7942cb09e84c9 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5086,6 +5086,11 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return Op0; } break; + case Intrinsic::copysign: + // copysign X, X --> X + if (Op0 == Op1) + return Op0; + break; case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::maximum: diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index bad2de9e5f5e0..78ad5859de4c6 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -136,12 +136,9 @@ namespace { /// A callback value handle updates the cache when values are erased. class LazyValueInfoCache; struct LVIValueHandle final : public CallbackVH { - // Needs to access getValPtr(), which is protected. - friend struct DenseMapInfo; - LazyValueInfoCache *Parent; - LVIValueHandle(Value *V, LazyValueInfoCache *P) + LVIValueHandle(Value *V, LazyValueInfoCache *P = nullptr) : CallbackVH(V), Parent(P) { } void deleted() override; @@ -155,89 +152,63 @@ namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - /// This is all of the cached block information for exactly one Value*. - /// The entries are sorted by the BasicBlock* of the - /// entries, allowing us to do a lookup with a binary search. - /// Over-defined lattice values are recorded in OverDefinedCache to reduce - /// memory overhead. - struct ValueCacheEntryTy { - ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {} - LVIValueHandle Handle; - SmallDenseMap, ValueLatticeElement, 4> BlockVals; + /// This is all of the cached information for one basic block. It contains + /// the per-value lattice elements, as well as a separate set for + /// overdefined values to reduce memory usage. + struct BlockCacheEntryTy { + SmallDenseMap, ValueLatticeElement, 4> LatticeElements; + SmallDenseSet, 4> OverDefined; }; - /// This tracks, on a per-block basis, the set of values that are - /// over-defined at the end of that block. - typedef DenseMap, SmallPtrSet> - OverDefinedCacheTy; - /// Keep track of all blocks that we have ever seen, so we - /// don't spend time removing unused blocks from our caches. - DenseSet > SeenBlocks; - - /// This is all of the cached information for all values, - /// mapped from Value* to key information. - DenseMap> ValueCache; - OverDefinedCacheTy OverDefinedCache; - + /// Cached information per basic block. + DenseMap, BlockCacheEntryTy> BlockCache; + /// Set of value handles used to erase values from the cache on deletion. + DenseSet> ValueHandles; public: void insertResult(Value *Val, BasicBlock *BB, const ValueLatticeElement &Result) { - SeenBlocks.insert(BB); - + auto &CacheEntry = BlockCache.try_emplace(BB).first->second; // Insert over-defined values into their own cache to reduce memory // overhead. if (Result.isOverdefined()) - OverDefinedCache[BB].insert(Val); - else { - auto It = ValueCache.find_as(Val); - if (It == ValueCache.end()) { - ValueCache[Val] = std::make_unique(Val, this); - It = ValueCache.find_as(Val); - assert(It != ValueCache.end() && "Val was just added to the map!"); - } - It->second->BlockVals[BB] = Result; - } - } - - bool isOverdefined(Value *V, BasicBlock *BB) const { - auto ODI = OverDefinedCache.find(BB); - - if (ODI == OverDefinedCache.end()) - return false; + CacheEntry.OverDefined.insert(Val); + else + CacheEntry.LatticeElements.insert({ Val, Result }); - return ODI->second.count(V); + auto HandleIt = ValueHandles.find_as(Val); + if (HandleIt == ValueHandles.end()) + ValueHandles.insert({ Val, this }); } bool hasCachedValueInfo(Value *V, BasicBlock *BB) const { - if (isOverdefined(V, BB)) - return true; - - auto I = ValueCache.find_as(V); - if (I == ValueCache.end()) + auto It = BlockCache.find(BB); + if (It == BlockCache.end()) return false; - return I->second->BlockVals.count(BB); + return It->second.OverDefined.count(V) || + It->second.LatticeElements.count(V); } ValueLatticeElement getCachedValueInfo(Value *V, BasicBlock *BB) const { - if (isOverdefined(V, BB)) + auto It = BlockCache.find(BB); + if (It == BlockCache.end()) + return ValueLatticeElement(); + + if (It->second.OverDefined.count(V)) return ValueLatticeElement::getOverdefined(); - auto I = ValueCache.find_as(V); - if (I == ValueCache.end()) + auto LatticeIt = It->second.LatticeElements.find(V); + if (LatticeIt == It->second.LatticeElements.end()) return ValueLatticeElement(); - auto BBI = I->second->BlockVals.find(BB); - if (BBI == I->second->BlockVals.end()) - return ValueLatticeElement(); - return BBI->second; + + return LatticeIt->second; } /// clear - Empty the cache. void clear() { - SeenBlocks.clear(); - ValueCache.clear(); - OverDefinedCache.clear(); + BlockCache.clear(); + ValueHandles.clear(); } /// Inform the cache that a given value has been deleted. @@ -251,23 +222,18 @@ namespace { /// OldSucc might have (unless also overdefined in NewSucc). This just /// flushes elements from the cache and does not add any. void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc); - - friend struct LVIValueHandle; }; } void LazyValueInfoCache::eraseValue(Value *V) { - for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) { - // Copy and increment the iterator immediately so we can erase behind - // ourselves. - auto Iter = I++; - SmallPtrSetImpl &ValueSet = Iter->second; - ValueSet.erase(V); - if (ValueSet.empty()) - OverDefinedCache.erase(Iter); + for (auto &Pair : BlockCache) { + Pair.second.LatticeElements.erase(V); + Pair.second.OverDefined.erase(V); } - ValueCache.erase(V); + auto HandleIt = ValueHandles.find_as(V); + if (HandleIt != ValueHandles.end()) + ValueHandles.erase(HandleIt); } void LVIValueHandle::deleted() { @@ -277,18 +243,7 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { - // Shortcut if we have never seen this block. - DenseSet >::iterator I = SeenBlocks.find(BB); - if (I == SeenBlocks.end()) - return; - SeenBlocks.erase(I); - - auto ODI = OverDefinedCache.find(BB); - if (ODI != OverDefinedCache.end()) - OverDefinedCache.erase(ODI); - - for (auto &I : ValueCache) - I.second->BlockVals.erase(BB); + BlockCache.erase(BB); } void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, @@ -306,10 +261,11 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, std::vector worklist; worklist.push_back(OldSucc); - auto I = OverDefinedCache.find(OldSucc); - if (I == OverDefinedCache.end()) + auto I = BlockCache.find(OldSucc); + if (I == BlockCache.end() || I->second.OverDefined.empty()) return; // Nothing to process here. - SmallVector ValsToClear(I->second.begin(), I->second.end()); + SmallVector ValsToClear(I->second.OverDefined.begin(), + I->second.OverDefined.end()); // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already @@ -323,10 +279,10 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, if (ToUpdate == NewSucc) continue; // If a value was marked overdefined in OldSucc, and is here too... - auto OI = OverDefinedCache.find(ToUpdate); - if (OI == OverDefinedCache.end()) + auto OI = BlockCache.find(ToUpdate); + if (OI == BlockCache.end() || OI->second.OverDefined.empty()) continue; - SmallPtrSetImpl &ValueSet = OI->second; + auto &ValueSet = OI->second.OverDefined; bool changed = false; for (Value *V : ValsToClear) { @@ -336,11 +292,6 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, // If we removed anything, then we potentially need to update // blocks successors too. changed = true; - - if (ValueSet.empty()) { - OverDefinedCache.erase(OI); - break; - } } if (!changed) continue; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 66c43cb451118..d635afb0a299c 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5717,10 +5717,11 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); - if (Known.One != ~Known.Zero + 1) - ConservativeResult = - ConservativeResult.intersectWith( - ConstantRange(Known.One, ~Known.Zero + 1), RangeType); + // If Known does not result in full-set, intersect with it. + if (Known.getMinValue() != Known.getMaxValue() + 1) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1), + RangeType); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); @@ -12040,6 +12041,12 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, AM.getResult(F)); } +PreservedAnalyses +ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { + AM.getResult(F).verify(); + return PreservedAnalyses::all(); +} + PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult(F).print(OS); diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index 067283d38b66f..a331b95e818b2 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -402,8 +402,8 @@ Optional VFABI::tryDemangleForVFABI(StringRef MangledName) { assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate && "The global predicate must be the last parameter"); - const VFShape Shape({VF, IsScalable, ISA, Parameters}); - return VFInfo({Shape, ScalarName, VectorName}); + const VFShape Shape({VF, IsScalable, Parameters}); + return VFInfo({Shape, ScalarName, VectorName, ISA}); } VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) { diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 51d92cca214ba..f46bae77ba269 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -915,7 +915,7 @@ static void computeKnownBitsFromShiftOperator( // If the shift amount could be greater than or equal to the bit-width of the // LHS, the value could be poison, but bail out because the check below is // expensive. TODO: Should we just carry on? - if ((~Known.Zero).uge(BitWidth)) { + if (Known.getMaxValue().uge(BitWidth)) { Known.resetAll(); return; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 44043bd582c6e..c45ab941a1428 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1182,3 +1182,47 @@ void VFABI::getVectorVariantNames( VariantMappings.push_back(S); } } + +bool VFShape::hasValidParameterList() const { + for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams; + ++Pos) { + assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list."); + + switch (Parameters[Pos].ParamKind) { + default: // Nothing to check. + break; + case VFParamKind::OMP_Linear: + case VFParamKind::OMP_LinearRef: + case VFParamKind::OMP_LinearVal: + case VFParamKind::OMP_LinearUVal: + // Compile time linear steps must be non-zero. + if (Parameters[Pos].LinearStepOrPos == 0) + return false; + break; + case VFParamKind::OMP_LinearPos: + case VFParamKind::OMP_LinearRefPos: + case VFParamKind::OMP_LinearValPos: + case VFParamKind::OMP_LinearUValPos: + // The runtime linear step must be referring to some other + // parameters in the signature. + if (Parameters[Pos].LinearStepOrPos >= int(NumParams)) + return false; + // The linear step parameter must be marked as uniform. + if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind != + VFParamKind::OMP_Uniform) + return false; + // The linear step parameter can't point at itself. + if (Parameters[Pos].LinearStepOrPos == int(Pos)) + return false; + break; + case VFParamKind::GlobalPredicate: + // The global predicate must be the unique. Can be placed anywhere in the + // signature. + for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos) + if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate) + return false; + break; + } + } + return true; +} diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp index 001b8077cd3d1..29ccbaea3584d 100644 --- a/llvm/lib/BinaryFormat/XCOFF.cpp +++ b/llvm/lib/BinaryFormat/XCOFF.cpp @@ -24,6 +24,10 @@ StringRef XCOFF::getMappingClassString(XCOFF::StorageMappingClass SMC) { return "BS"; case XCOFF::XMC_RO: return "RO"; + case XCOFF::XMC_UA: + return "UA"; + case XCOFF::XMC_TC: + return "TC"; default: report_fatal_error("Unhandled storage-mapping class."); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index c4d5b717b25df..84b86a71fa5fe 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -319,8 +319,10 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag) { Die.Owner = this; assert((UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_skeleton_unit || UnitTag == dwarf::DW_TAG_type_unit || - UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG"); + UnitTag == dwarf::DW_TAG_partial_unit) && + "expected a unit TAG"); } void DIEValue::EmitValue(const AsmPrinter *AP) const { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9578e01abdd47..4e90c10e3e9d8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -52,10 +52,23 @@ using namespace llvm; +static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) { + + // According to DWARF Debugging Information Format Version 5, + // 3.1.2 Skeleton Compilation Unit Entries: + // "When generating a split DWARF object file (see Section 7.3.2 + // on page 187), the compilation unit in the .debug_info section + // is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit" + if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton) + return dwarf::DW_TAG_skeleton_unit; + + return dwarf::DW_TAG_compile_unit; +} + DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, - DwarfFile *DWU) - : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) { + DwarfFile *DWU, UnitKind Kind) + : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) { insertDIE(Node, &getUnitDie()); MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin"); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 1b7ea2673ac09..8491d078ed899 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -40,6 +40,8 @@ class MCExpr; class MCSymbol; class MDNode; +enum class UnitKind { Skeleton, Full }; + class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; @@ -104,7 +106,8 @@ class DwarfCompileUnit final : public DwarfUnit { public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); + DwarfDebug *DW, DwarfFile *DWU, + UnitKind Kind = UnitKind::Full); bool hasRangeLists() const { return HasRangeLists; } unsigned getUniqueID() const { return UniqueID; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 551e8a2751b5f..09772537a97b8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -617,6 +617,10 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, // Search for a loading value in forwarding registers. for (; I != MBB->rend(); ++I) { + // Skip bundle headers. + if (I->isBundle()) + continue; + // If the next instruction is a call we can not interpret parameter's // forwarding registers or we finished the interpretation of all parameters. if (I->isCall()) @@ -1169,7 +1173,7 @@ void DwarfDebug::finalizeModuleInfo() { auto *CUNode = cast(P.first); // If compile Unit has macros, emit "DW_AT_macro_info" attribute. - if (CUNode->getMacros()) + if (CUNode->getMacros() && !useSplitDwarf()) U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, U.getMacroLabelBegin(), TLOF.getDwarfMacinfoSection()->getBeginSymbol()); @@ -1208,10 +1212,10 @@ void DwarfDebug::endModule() { emitDebugStr(); if (useSplitDwarf()) - // Handles debug_loc.dwo / debug_loclists.dwo section emission + // Emit debug_loc.dwo/debug_loclists.dwo section. emitDebugLocDWO(); else - // Handles debug_loc / debug_loclists section emission + // Emit debug_loc/debug_loclists section. emitDebugLoc(); // Corresponding abbreviations into a abbrev section. @@ -1227,8 +1231,12 @@ void DwarfDebug::endModule() { // Emit info into a debug ranges section. emitDebugRanges(); + if (useSplitDwarf()) + // Emit info into a debug macinfo.dwo section. + emitDebugMacinfoDWO(); + else // Emit info into a debug macinfo section. - emitDebugMacinfo(); + emitDebugMacinfo(); if (useSplitDwarf()) { emitDebugStrDWO(); @@ -2783,6 +2791,24 @@ void DwarfDebug::emitDebugMacinfo() { } } +void DwarfDebug::emitDebugMacinfoDWO() { + for (const auto &P : CUMap) { + auto &TheCU = *P.second; + auto *SkCU = TheCU.getSkeleton(); + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + auto *CUNode = cast(P.first); + DIMacroNodeArray Macros = CUNode->getMacros(); + if (Macros.empty()) + continue; + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfMacinfoDWOSection()); + Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); + handleMacroNodes(Macros, U); + Asm->OutStreamer->AddComment("End Of Macro List Mark"); + Asm->emitInt8(0); + } +} + // DWARF5 Experimental Separate Dwarf emitters. void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, @@ -2799,7 +2825,8 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = std::make_unique( - CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); + CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder, + UnitKind::Skeleton); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 85016074e2519..03949dbddea69 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -508,6 +508,8 @@ class DwarfDebug : public DebugHandlerBase { /// Emit macros into a debug macinfo section. void emitDebugMacinfo(); + /// Emit macros into a debug macinfo.dwo section. + void emitDebugMacinfoDWO(); void emitMacro(DIMacro &M); void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U); void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 9d7fee1d5b389..86522a85427a5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -800,6 +800,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { if (!Name.empty()) addString(Buffer, dwarf::DW_AT_name, Name); + // If alignment is specified for a typedef , create and insert DW_AT_alignment + // attribute in DW_TAG_typedef DIE. + if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) { + uint32_t AlignInBytes = DTy->getAlignInBytes(); + if (AlignInBytes > 0) + addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + } + // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type && Tag != dwarf::DW_TAG_ptr_to_member_type diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 8875568c5938f..c10c3f4d78634 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -163,6 +163,7 @@ add_llvm_component_library(LLVMCodeGen TargetRegisterInfo.cpp TargetSchedule.cpp TargetSubtargetInfo.cpp + TypePromotion.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp ValueTypes.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 85696ccc482a7..20fc67cc66ae7 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -105,6 +105,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); + initializeTypePromotionPass(Registry); initializeUnpackMachineBundlesPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 89f69bdf37e97..0689f8e4f0c30 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1054,7 +1054,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { // Collect all the relocate calls associated with a statepoint AllRelocateCalls.push_back(Relocate); - // We need atleast one base pointer relocation + one derived pointer + // We need at least one base pointer relocation + one derived pointer // relocation to mangle if (AllRelocateCalls.size() < 2) return false; diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 702e7e244bcec..8d9d48402b311 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (MO.clobbersPhysReg(i)) { + if (MO.isRegMask()) { + auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) { + for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI) + if (!MO.clobbersPhysReg(*SRI)) + return false; + + return true; + }; + + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + if (ClobbersPhysRegAndSubRegs(i)) { DefIndices[i] = Count; KillIndices[i] = ~0u; KeepRegs.reset(i); Classes[i] = nullptr; RegRefs.erase(i); } + } + } if (!MO.isReg()) continue; Register Reg = MO.getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index bcf31e16142cf..6712ff5c732d8 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -74,12 +74,35 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); + + // Give up if either DstReg or SrcReg is a physical register. + if (Register::isPhysicalRegister(DstReg) || + Register::isPhysicalRegister(SrcReg)) + return false; + + // Give up the types don't match. LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); - // Simple Copy Propagation. - // a(sx) = COPY b(sx) -> Replace all uses of a with b. - if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) + // Give up if one has a valid LLT, but the other doesn't. + if (DstTy.isValid() != SrcTy.isValid()) + return false; + // Give up if the types don't match. + if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy) + return false; + + // Get the register banks and classes. + const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg); + const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg); + const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg); + const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg); + + // Replace if the register constraints match. + if ((SrcRC == DstRC) && (SrcBank == DstBank)) return true; + // Replace if DstReg has no constraints. + if (!DstBank && !DstRC) + return true; + return false; } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d8bcc59c7658e..5e1d5d9b579b6 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1675,7 +1675,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_CONSTANT: { MachineOperand &SrcMO = MI.getOperand(1); LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); + unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( + MRI.getType(MI.getOperand(0).getReg())); + assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || + ExtOpc == TargetOpcode::G_ANYEXT) && + "Illegal Extend"); + const APInt &SrcVal = SrcMO.getCImm()->getValue(); + const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT) + ? SrcVal.sext(WideTy.getSizeInBits()) + : SrcVal.zext(WideTy.getSizeInBits()); Observer.changingInstr(MI); SrcMO.setCImm(ConstantInt::get(Ctx, Val)); @@ -2109,7 +2117,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { default: llvm_unreachable("Unexpected opcode"); case TargetOpcode::G_LOAD: - MIRBuilder.buildAnyExt(DstReg, TmpReg); + MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); break; case TargetOpcode::G_SEXTLOAD: MIRBuilder.buildSExt(DstReg, TmpReg); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 70045512fae51..f897f9c7e20aa 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -685,6 +685,10 @@ bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI, return true; } +unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const { + return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; +} + /// \pre Type indices of every opcode form a dense set starting from 0. void LegalizerInfo::verify(const MCInstrInfo &MII) const { #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 4e54437947ff6..b3ca4c1d8020b 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -698,7 +698,7 @@ void ImplicitNullChecks::rewriteNullChecks( if (auto *DepMI = NC.getOnlyDependency()) { for (auto &MO : DepMI->operands()) { - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead()) continue; if (!NC.getNotNullSucc()->isLiveIn(MO.getReg())) NC.getNotNullSucc()->addLiveIn(MO.getReg()); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 2408f18678e46..75d978472cf35 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -543,8 +543,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction SmallVector, 8> Ops; - MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops); if (!RI.Reads) return false; @@ -782,7 +781,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, /// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// -/// @param Ops Operand indices from analyzeVirtReg(). +/// @param Ops Operand indices from AnalyzeVirtRegInBundle(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. /// @return True on success. bool InlineSpiller:: @@ -851,8 +850,7 @@ foldMemoryOperand(ArrayRef> Ops, // Skip non-Defs, including undef uses and internal reads. if (MO->isUse()) continue; - MIBundleOperands::PhysRegInfo RI = - MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI); + PhysRegInfo RI = AnalyzePhysRegInBundle(*FoldMI, Reg, &TRI); if (RI.FullyDefined) continue; // FoldMI does not define this physreg. Remove the LI segment. @@ -992,8 +990,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Analyze instruction. SmallVector, 8> Ops; - MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index 7734f5e5ef707..04efa7bc35e96 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -144,60 +144,6 @@ class LiveDebugValues : public MachineFunctionPass { using FragmentInfo = DIExpression::FragmentInfo; using OptFragmentInfo = Optional; - /// Storage for identifying a potentially inlined instance of a variable, - /// or a fragment thereof. - class DebugVariable { - const DILocalVariable *Variable; - OptFragmentInfo Fragment; - const DILocation *InlinedAt; - - /// Fragment that will overlap all other fragments. Used as default when - /// caller demands a fragment. - static const FragmentInfo DefaultFragment; - - public: - DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo, - const DILocation *InlinedAt) - : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} - - DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo, - const DILocation *InlinedAt) - : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {} - - DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr, - const DILocation *InlinedAt) - : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {} - - DebugVariable(const MachineInstr &MI) - : DebugVariable(MI.getDebugVariable(), - MI.getDebugExpression()->getFragmentInfo(), - MI.getDebugLoc()->getInlinedAt()) {} - - const DILocalVariable *getVar() const { return Variable; } - const OptFragmentInfo &getFragment() const { return Fragment; } - const DILocation *getInlinedAt() const { return InlinedAt; } - - const FragmentInfo getFragmentDefault() const { - return Fragment.getValueOr(DefaultFragment); - } - - static bool isFragmentDefault(FragmentInfo &F) { - return F == DefaultFragment; - } - - bool operator==(const DebugVariable &Other) const { - return std::tie(Variable, Fragment, InlinedAt) == - std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); - } - - bool operator<(const DebugVariable &Other) const { - return std::tie(Variable, Fragment, InlinedAt) < - std::tie(Other.Variable, Other.Fragment, Other.InlinedAt); - } - }; - - friend struct llvm::DenseMapInfo; - /// A pair of debug variable and value location. struct VarLoc { // The location at which a spilled variable resides. It consists of a @@ -226,7 +172,9 @@ class LiveDebugValues : public MachineFunctionPass { RegisterKind, SpillLocKind, ImmediateKind, - EntryValueKind + EntryValueKind, + EntryValueBackupKind, + EntryValueCopyBackupKind } Kind = InvalidKind; /// The value location. Stored separately to avoid repeatedly @@ -241,14 +189,15 @@ class LiveDebugValues : public MachineFunctionPass { } Loc; VarLoc(const MachineInstr &MI, LexicalScopes &LS) - : Var(MI), Expr(MI.getDebugExpression()), MI(MI), - UVS(MI.getDebugLoc(), LS) { + : Var(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()), + Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); if (int RegNo = isDbgValueDescribedByReg(MI)) { - Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind; + Kind = RegisterKind; Loc.RegNo = RegNo; } else if (MI.getOperand(0).isImm()) { Kind = ImmediateKind; @@ -260,17 +209,50 @@ class LiveDebugValues : public MachineFunctionPass { Kind = ImmediateKind; Loc.CImm = MI.getOperand(0).getCImm(); } - assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) && - "entry values must be register locations"); + + // We create the debug entry values from the factory functions rather than + // from this ctor. + assert(Kind != EntryValueKind && !isEntryBackupLoc()); } /// Take the variable and machine-location in DBG_VALUE MI, and build an /// entry location using the given expression. static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, - const DIExpression *EntryExpr) { + const DIExpression *EntryExpr, unsigned Reg) { VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); VL.Kind = EntryValueKind; VL.Expr = EntryExpr; + VL.Loc.RegNo = Reg; + return VL; + } + + /// Take the variable and machine-location from the DBG_VALUE (from the + /// function entry), and build an entry value backup location. The backup + /// location will turn into the normal location if the backup is valid at + /// the time of the primary location clobbering. + static VarLoc CreateEntryBackupLoc(const MachineInstr &MI, + LexicalScopes &LS, + const DIExpression *EntryExpr) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = EntryValueBackupKind; + VL.Expr = EntryExpr; + return VL; + } + + /// Take the variable and machine-location from the DBG_VALUE (from the + /// function entry), and build a copy of an entry value backup location by + /// setting the register location to NewReg. + static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI, + LexicalScopes &LS, + const DIExpression *EntryExpr, + unsigned NewReg) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = EntryValueCopyBackupKind; + VL.Expr = EntryExpr; + VL.Loc.RegNo = NewReg; return VL; } @@ -309,8 +291,11 @@ class LiveDebugValues : public MachineFunctionPass { switch (Kind) { case EntryValueKind: // An entry value is a register location -- but with an updated - // expression. - return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr); + // expression. The register location of such DBG_VALUE is always the one + // from the entry DBG_VALUE, it does not matter if the entry value was + // copied in to another register due to some optimizations. + return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(), + Var, Expr); case RegisterKind: // Register locations are like the source DBG_VALUE, but with the // register number from this VarLoc. @@ -329,8 +314,11 @@ class LiveDebugValues : public MachineFunctionPass { MachineOperand MO = MI.getOperand(0); return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); } + case EntryValueBackupKind: + case EntryValueCopyBackupKind: case InvalidKind: - llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); + llvm_unreachable( + "Tried to produce DBG_VALUE for invalid or backup VarLoc"); } llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum"); } @@ -338,6 +326,27 @@ class LiveDebugValues : public MachineFunctionPass { /// Is the Loc field a constant or constant object? bool isConstant() const { return Kind == ImmediateKind; } + /// Check if the Loc field is an entry backup location. + bool isEntryBackupLoc() const { + return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind; + } + + /// If this variable is described by a register holding the entry value, + /// return it, otherwise return 0. + unsigned getEntryValueBackupReg() const { + if (Kind == EntryValueBackupKind) + return Loc.RegNo; + return 0; + } + + /// If this variable is described by a register holding the copy of the + /// entry value, return it, otherwise return 0. + unsigned getEntryValueCopyBackupReg() const { + if (Kind == EntryValueCopyBackupKind) + return Loc.RegNo; + return 0; + } + /// If this variable is described by a register, return it, /// otherwise return 0. unsigned isDescribedByReg() const { @@ -357,6 +366,8 @@ class LiveDebugValues : public MachineFunctionPass { switch (Kind) { case RegisterKind: case EntryValueKind: + case EntryValueBackupKind: + case EntryValueCopyBackupKind: dbgs() << printReg(Loc.RegNo, TRI); break; case SpillLocKind: @@ -370,11 +381,17 @@ class LiveDebugValues : public MachineFunctionPass { llvm_unreachable("Invalid VarLoc in dump method"); } - dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", "; + dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr + << ", "; if (Var.getInlinedAt()) dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; else - dbgs() << "(null))\n"; + dbgs() << "(null))"; + + if (isEntryBackupLoc()) + dbgs() << " (backup loc)\n"; + else + dbgs() << "\n"; } #endif @@ -390,7 +407,6 @@ class LiveDebugValues : public MachineFunctionPass { } }; - using DebugParamMap = SmallDenseMap; using VarLocMap = UniqueVector; using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap; @@ -416,10 +432,18 @@ class LiveDebugValues : public MachineFunctionPass { /// This holds the working set of currently open ranges. For fast /// access, this is done both as a set of VarLocIDs, and a map of /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all - /// previous open ranges for the same variable. + /// previous open ranges for the same variable. In addition, we keep + /// two different maps (Vars/EntryValuesBackupVars), so erase/insert + /// methods act differently depending on whether a VarLoc is primary + /// location or backup one. In the case the VarLoc is backup location + /// we will erase/insert from the EntryValuesBackupVars map, otherwise + /// we perform the operation on the Vars. class OpenRangesSet { VarLocSet VarLocs; + // Map the DebugVariable to recent primary location ID. SmallDenseMap Vars; + // Map the DebugVariable to recent backup location ID. + SmallDenseMap EntryValuesBackupVars; OverlapMap &OverlappingFragments; public: @@ -427,40 +451,38 @@ class LiveDebugValues : public MachineFunctionPass { const VarLocSet &getVarLocs() const { return VarLocs; } - /// Terminate all open ranges for Var by removing it from the set. - void erase(DebugVariable Var); + /// Terminate all open ranges for VL.Var by removing it from the set. + void erase(const VarLoc &VL); /// Terminate all open ranges listed in \c KillSet by removing /// them from the set. - void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) { - VarLocs.intersectWithComplement(KillSet); - for (unsigned ID : KillSet) - Vars.erase(VarLocIDs[ID].Var); - } + void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs); /// Insert a new range into the set. - void insert(unsigned VarLocID, DebugVariable Var) { - VarLocs.set(VarLocID); - Vars.insert({Var, VarLocID}); - } + void insert(unsigned VarLocID, const VarLoc &VL); /// Insert a set of ranges. void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { for (unsigned Id : ToLoad) { - const VarLoc &Var = Map[Id]; - insert(Id, Var.Var); + const VarLoc &VarL = Map[Id]; + insert(Id, VarL); } } + llvm::Optional getEntryValueBackup(DebugVariable Var); + /// Empty the set. void clear() { VarLocs.clear(); Vars.clear(); + EntryValuesBackupVars.clear(); } /// Return whether the set is empty or not. bool empty() const { - assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent"); + assert(Vars.empty() == EntryValuesBackupVars.empty() && + Vars.empty() == VarLocs.empty() && + "open ranges are inconsistent"); return VarLocs.empty(); } }; @@ -502,21 +524,23 @@ class LiveDebugValues : public MachineFunctionPass { VarLocMap &VarLocIDs); void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); + bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, const VarLoc &EntryVL); void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet); + void recordEntryValue(const MachineInstr &MI, + const DefinedRegsSet &DefinedRegs, + OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs); void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers); void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals); + VarLocMap &VarLocIDs, TransferMap &Transfers); bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); void process(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals); + VarLocMap &VarLocIDs, TransferMap &Transfers); void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, OverlapMap &OLapMap); @@ -559,46 +583,10 @@ class LiveDebugValues : public MachineFunctionPass { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo { - using DV = LiveDebugValues::DebugVariable; - using OptFragmentInfo = LiveDebugValues::OptFragmentInfo; - using FragmentInfo = LiveDebugValues::FragmentInfo; - - // Empty key: no key should be generated that has no DILocalVariable. - static inline DV getEmptyKey() { - return DV(nullptr, OptFragmentInfo(), nullptr); - } - - // Difference in tombstone is that the Optional is meaningful - static inline DV getTombstoneKey() { - return DV(nullptr, OptFragmentInfo({0, 0}), nullptr); - } - - static unsigned getHashValue(const DV &D) { - unsigned HV = 0; - const OptFragmentInfo &Fragment = D.getFragment(); - if (Fragment) - HV = DenseMapInfo::getHashValue(*Fragment); - - return hash_combine(D.getVar(), HV, D.getInlinedAt()); - } - - static bool isEqual(const DV &A, const DV &B) { return A == B; } -}; - -} // namespace llvm - //===----------------------------------------------------------------------===// // Implementation //===----------------------------------------------------------------------===// -const DIExpression::FragmentInfo - LiveDebugValues::DebugVariable::DefaultFragment = { - std::numeric_limits::max(), - std::numeric_limits::min()}; - char LiveDebugValues::ID = 0; char &llvm::LiveDebugValuesID = LiveDebugValues::ID; @@ -619,38 +607,72 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { } /// Erase a variable from the set of open ranges, and additionally erase any -/// fragments that may overlap it. -void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) { +/// fragments that may overlap it. If the VarLoc is a buckup location, erase +/// the variable from the EntryValuesBackupVars set, indicating we should stop +/// tracking its backup entry location. Otherwise, if the VarLoc is primary +/// location, erase the variable from the Vars set. +void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) { // Erasure helper. - auto DoErase = [this](DebugVariable VarToErase) { - auto It = Vars.find(VarToErase); - if (It != Vars.end()) { + auto DoErase = [VL, this](DebugVariable VarToErase) { + auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + auto It = EraseFrom->find(VarToErase); + if (It != EraseFrom->end()) { unsigned ID = It->second; VarLocs.reset(ID); - Vars.erase(It); + EraseFrom->erase(It); } }; + DebugVariable Var = VL.Var; + // Erase the variable/fragment that ends here. DoErase(Var); // Extract the fragment. Interpret an empty fragment as one that covers all // possible bits. - FragmentInfo ThisFragment = Var.getFragmentDefault(); + FragmentInfo ThisFragment = Var.getFragmentOrDefault(); // There may be fragments that overlap the designated fragment. Look them up // in the pre-computed overlap map, and erase them too. - auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment}); + auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment}); if (MapIt != OverlappingFragments.end()) { for (auto Fragment : MapIt->second) { LiveDebugValues::OptFragmentInfo FragmentHolder; - if (!DebugVariable::isFragmentDefault(Fragment)) + if (!DebugVariable::isDefaultFragment(Fragment)) FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment); - DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()}); + DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()}); } } } +void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet, + const VarLocMap &VarLocIDs) { + VarLocs.intersectWithComplement(KillSet); + for (unsigned ID : KillSet) { + const VarLoc *VL = &VarLocIDs[ID]; + auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + EraseFrom->erase(VL->Var); + } +} + +void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID, + const VarLoc &VL) { + auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars; + VarLocs.set(VarLocID); + InsertInto->insert({VL.Var, VarLocID}); +} + +/// Return the Loc ID of an entry value backup location, if it exists for the +/// variable. +llvm::Optional +LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) { + auto It = EntryValuesBackupVars.find(Var); + if (It != EntryValuesBackupVars.end()) + return It->second; + + return llvm::None; +} + //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// @@ -669,7 +691,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, Out << "MBB: " << BB.getNumber() << ":\n"; for (unsigned VLL : L) { const VarLoc &VL = VarLocIDs[VLL]; - Out << " Var: " << VL.Var.getVar()->getName(); + Out << " Var: " << VL.Var.getVariable()->getName(); Out << " MI: "; VL.dump(TRI, Out); } @@ -693,6 +715,62 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) { return {Reg, Offset}; } +/// Try to salvage the debug entry value if we encounter a new debug value +/// describing the same parameter, otherwise stop tracking the value. Return +/// true if we should stop tracking the entry value, otherwise return false. +bool LiveDebugValues::removeEntryValue(const MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs, + const VarLoc &EntryVL) { + // Skip the DBG_VALUE which is the debug entry value itself. + if (MI.isIdenticalTo(EntryVL.MI)) + return false; + + // If the parameter's location is not register location, we can not track + // the entry value any more. In addition, if the debug expression from the + // DBG_VALUE is not empty, we can assume the parameter's value has changed + // indicating that we should stop tracking its entry value as well. + if (!MI.getOperand(0).isReg() || + MI.getDebugExpression()->getNumElements() != 0) + return true; + + // If the DBG_VALUE comes from a copy instruction that copies the entry value, + // it means the parameter's value has not changed and we should be able to use + // its entry value. + bool TrySalvageEntryValue = false; + Register Reg = MI.getOperand(0).getReg(); + auto I = std::next(MI.getReverseIterator()); + const MachineOperand *SrcRegOp, *DestRegOp; + if (I != MI.getParent()->rend()) { + // TODO: Try to keep tracking of an entry value if we encounter a propagated + // DBG_VALUE describing the copy of the entry value. (Propagated entry value + // does not indicate the parameter modification.) + auto DestSrc = TII->isCopyInstr(*I); + if (!DestSrc) + return true; + + SrcRegOp = DestSrc->Source; + DestRegOp = DestSrc->Destination; + if (Reg != DestRegOp->getReg()) + return true; + TrySalvageEntryValue = true; + } + + if (TrySalvageEntryValue) { + for (unsigned ID : OpenRanges.getVarLocs()) { + const VarLoc &VL = VarLocIDs[ID]; + if (!VL.isEntryBackupLoc()) + continue; + + if (VL.getEntryValueCopyBackupReg() == Reg && + VL.MI.getOperand(0).getReg() == SrcRegOp->getReg()) + return false; + } + } + + return true; +} + /// End all previous ranges related to @MI and start a new range from @MI /// if it is a DBG_VALUE instr. void LiveDebugValues::transferDebugValue(const MachineInstr &MI, @@ -707,18 +785,33 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, assert(Var->isValidLocationForIntrinsic(DebugLoc) && "Expected inlined-at fields to agree"); - // End all previous ranges of Var. DebugVariable V(Var, Expr, InlinedAt); - OpenRanges.erase(V); - // Add the VarLoc to OpenRanges from this DBG_VALUE. + // Check if this DBG_VALUE indicates a parameter's value changing. + // If that is the case, we should stop tracking its entry value. + auto EntryValBackupID = OpenRanges.getEntryValueBackup(V); + if (Var->isParameter() && EntryValBackupID) { + const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; + if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) { + LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: "; + MI.print(dbgs(), /*IsStandalone*/ false, + /*SkipOpers*/ false, /*SkipDebugLoc*/ false, + /*AddNewLine*/ true, TII)); + OpenRanges.erase(EntryVL); + } + } + unsigned ID; if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) { // Use normal VarLoc constructor for registers and immediates. VarLoc VL(MI, LS); + // End all previous ranges of VL.Var. + OpenRanges.erase(VL); + ID = VarLocIDs.insert(VL); - OpenRanges.insert(ID, VL.Var); + // Add the VarLoc to OpenRanges from this DBG_VALUE. + OpenRanges.insert(ID, VL); } else if (MI.hasOneMemOperand()) { llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { @@ -728,32 +821,30 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, } } +/// Turn the entry value backup locations into primary locations. void LiveDebugValues::emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet) { for (unsigned ID : KillSet) { - if (!VarLocIDs[ID].Var.getVar()->isParameter()) + if (!VarLocIDs[ID].Var.getVariable()->isParameter()) continue; - const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI; + auto DebugVar = VarLocIDs[ID].Var; + auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar); - // If parameter's DBG_VALUE is not in the map that means we can't - // generate parameter's entry value. - if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable())) + // If the parameter has the entry value backup, it means we should + // be able to use its entry value. + if (!EntryValBackupID) continue; - auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()]; - DIExpression *NewExpr = DIExpression::prepend( - ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue); - - VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr); - - unsigned EntryValLocID = VarLocIDs.insert(EntryLoc); - Transfers.push_back({&MI, EntryValLocID}); - OpenRanges.insert(EntryValLocID, EntryLoc.Var); + const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID]; + VarLoc EntryLoc = + VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo); + unsigned EntryValueID = VarLocIDs.insert(EntryLoc); + Transfers.push_back({&MI, EntryValueID}); + OpenRanges.insert(EntryValueID, EntryLoc); } } @@ -768,23 +859,21 @@ void LiveDebugValues::insertTransferDebugPair( unsigned NewReg) { const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; - auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr, - &VarLocIDs](VarLoc &VL) { + auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) { unsigned LocId = VarLocIDs.insert(VL); // Close this variable's previous location range. - DebugVariable V(*DebugInstr); - OpenRanges.erase(V); + OpenRanges.erase(VL); // Record the new location as an open range, and a postponed transfer // inserting a DBG_VALUE for this location. - OpenRanges.insert(LocId, VL.Var); + OpenRanges.insert(LocId, VL); TransferDebugPair MIP = {&MI, LocId}; Transfers.push_back(MIP); }; - // End all previous ranges of Var. - OpenRanges.erase(VarLocIDs[OldVarID].Var); + // End all previous ranges of VL.Var. + OpenRanges.erase(VarLocIDs[OldVarID]); switch (Kind) { case TransferKind::TransferCopy: { assert(NewReg && @@ -832,7 +921,7 @@ void LiveDebugValues::insertTransferDebugPair( /// A definition of a register may mark the end of a range. void LiveDebugValues::transferRegisterDef( MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, - TransferMap &Transfers, DebugParamMap &DebugEntryVals) { + TransferMap &Transfers) { MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); @@ -866,8 +955,7 @@ void LiveDebugValues::transferRegisterDef( if (auto *TPC = getAnalysisIfAvailable()) { auto &TM = TPC->getTM(); if (TM.Options.EnableDebugEntryValues) - emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals, - KillSet); + emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet); } } @@ -1005,12 +1093,12 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, if (TKind == TransferKind::TransferSpill && VarLocIDs[ID].isDescribedByReg() == Reg) { LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); + << VarLocIDs[ID].Var.getVariable()->getName() << ")\n"); } else if (TKind == TransferKind::TransferRestore && VarLocIDs[ID].Kind == VarLoc::SpillLocKind && VarLocIDs[ID].Loc.SpillLocation == *Loc) { LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' - << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); + << VarLocIDs[ID].Var.getVariable()->getName() << ")\n"); } else continue; insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind, @@ -1026,14 +1114,14 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { - auto DestSrc = TII->isCopyInstr(MI); if (!DestSrc) return; const MachineOperand *DestRegOp = DestSrc->Destination; const MachineOperand *SrcRegOp = DestSrc->Source; - if (!SrcRegOp->isKill() || !DestRegOp->isDef()) + + if (!DestRegOp->isDef()) return; auto isCalleeSavedReg = [&](unsigned Reg) { @@ -1054,6 +1142,30 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, if (!isCalleeSavedReg(DestReg)) return; + // Remember an entry value movement. If we encounter a new debug value of + // a parameter describing only a moving of the value around, rather then + // modifying it, we are still able to use the entry value if needed. + if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) { + for (unsigned ID : OpenRanges.getVarLocs()) { + if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) { + LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump();); + VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc( + VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg); + + // Stop tracking the original entry value. + OpenRanges.erase(VarLocIDs[ID]); + + // Start tracking the entry value copy. + unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup); + OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup); + break; + } + } + } + + if (!SrcRegOp->isKill()) + return; + for (unsigned ID : OpenRanges.getVarLocs()) { if (VarLocIDs[ID].isDescribedByReg() == SrcReg) { insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, @@ -1099,26 +1211,27 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, OverlapMap &OverlappingFragments) { - DebugVariable MIVar(MI); - FragmentInfo ThisFragment = MIVar.getFragmentDefault(); + DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + FragmentInfo ThisFragment = MIVar.getFragmentOrDefault(); // If this is the first sighting of this variable, then we are guaranteed // there are currently no overlapping fragments either. Initialize the set // of seen fragments, record no overlaps for the current one, and return. - auto SeenIt = SeenFragments.find(MIVar.getVar()); + auto SeenIt = SeenFragments.find(MIVar.getVariable()); if (SeenIt == SeenFragments.end()) { SmallSet OneFragment; OneFragment.insert(ThisFragment); - SeenFragments.insert({MIVar.getVar(), OneFragment}); + SeenFragments.insert({MIVar.getVariable(), OneFragment}); - OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}}); + OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); return; } // If this particular Variable/Fragment pair already exists in the overlap // map, it has already been accounted for. auto IsInOLapMap = - OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}}); + OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); if (!IsInOLapMap.second) return; @@ -1136,7 +1249,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, // Mark the previously seen fragment as being overlapped by the current // one. auto ASeenFragmentsOverlaps = - OverlappingFragments.find({MIVar.getVar(), ASeenFragment}); + OverlappingFragments.find({MIVar.getVariable(), ASeenFragment}); assert(ASeenFragmentsOverlaps != OverlappingFragments.end() && "Previously seen var fragment has no vector of overlaps"); ASeenFragmentsOverlaps->second.push_back(ThisFragment); @@ -1148,11 +1261,9 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, /// This routine creates OpenRanges. void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocMap &VarLocIDs, TransferMap &Transfers, - DebugParamMap &DebugEntryVals) { + VarLocMap &VarLocIDs, TransferMap &Transfers) { transferDebugValue(MI, OpenRanges, VarLocIDs); - transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers, - DebugEntryVals); + transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers); transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } @@ -1201,7 +1312,7 @@ bool LiveDebugValues::join( if (!InLocsT.empty()) { for (auto ID : InLocsT) dbgs() << " gathered candidate incoming var: " - << VarLocIDs[ID].Var.getVar()->getName() << "\n"; + << VarLocIDs[ID].Var.getVariable()->getName() << "\n"; } }); @@ -1216,7 +1327,7 @@ bool LiveDebugValues::join( if (!VarLocIDs[ID].dominates(MBB)) { KillSet.set(ID); LLVM_DEBUG({ - auto Name = VarLocIDs[ID].Var.getVar()->getName(); + auto Name = VarLocIDs[ID].Var.getVariable()->getName(); dbgs() << " killing " << Name << ", it doesn't dominate MBB\n"; }); } @@ -1273,6 +1384,8 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, // The ID location is live-in to MBB -- work out what kind of machine // location it is and create a DBG_VALUE. const VarLoc &DiffIt = VarLocIDs[ID]; + if (DiffIt.isEntryBackupLoc()) + continue; MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); MBB.insert(MBB.instr_begin(), MI); @@ -1284,8 +1397,7 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, bool LiveDebugValues::isEntryValueCandidate( const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const { - if (!MI.isDebugValue()) - return false; + assert(MI.isDebugValue() && "This must be DBG_VALUE."); // TODO: Add support for local variables that are expressed in terms of // parameters entry values. @@ -1332,6 +1444,37 @@ static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs, Regs.insert(*AI); } +/// This routine records the entry values of function parameters. The values +/// could be used as backup values. If we loose the track of some unmodified +/// parameters, the backup values will be used as a primary locations. +void LiveDebugValues::recordEntryValue(const MachineInstr &MI, + const DefinedRegsSet &DefinedRegs, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs) { + if (auto *TPC = getAnalysisIfAvailable()) { + auto &TM = TPC->getTM(); + if (!TM.Options.EnableDebugEntryValues) + return; + } + + DebugVariable V(MI.getDebugVariable(), MI.getDebugExpression(), + MI.getDebugLoc()->getInlinedAt()); + + if (!isEntryValueCandidate(MI, DefinedRegs) || + OpenRanges.getEntryValueBackup(V)) + return; + + LLVM_DEBUG(dbgs() << "Creating the backup entry location: "; MI.dump();); + + // Create the entry value and use it as a backup location until it is + // valid. It is valid until a parameter is not changed. + DIExpression *NewExpr = + DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue); + VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr); + unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup); + OpenRanges.insert(EntryValLocID, EntryValLocAsBackup); +} + /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { @@ -1368,23 +1511,17 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { std::greater> Pending; - // Working set of currently collected debug variables mapped to DBG_VALUEs - // representing candidates for production of debug entry values. - DebugParamMap DebugEntryVals; - // Set of register defines that are seen when traversing the entry block // looking for debug entry value candidates. DefinedRegsSet DefinedRegs; // Only in the case of entry MBB collect DBG_VALUEs representing // function parameters in order to generate debug entry values for them. - MachineBasicBlock &First_MBB = *(MF.begin()); for (auto &MI : First_MBB) { collectRegDefs(MI, DefinedRegs, TRI); - if (isEntryValueCandidate(MI, DefinedRegs) && - !DebugEntryVals.count(MI.getDebugVariable())) - DebugEntryVals[MI.getDebugVariable()] = &MI; + if (MI.isDebugValue()) + recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs); } // Initialize per-block structures and scan for fragment overlaps. @@ -1443,7 +1580,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // First load any pending inlocs. OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); for (auto &MI : *MBB) - process(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals); + process(MI, OpenRanges, VarLocIDs, Transfers); OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 600e7880c702b..9c80282bc59eb 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1072,9 +1072,9 @@ class LiveIntervals::HMEditor { // Kill flags shouldn't be used while live intervals exist, they will be // reinserted by VirtRegRewriter. if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end)) - for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO) - if (MO->isReg() && MO->isUse()) - MO->setIsKill(false); + for (MachineOperand &MOP : mi_bundle_ops(*KillMI)) + if (MOP.isReg() && MOP.isUse()) + MOP.setIsKill(false); // Is there a def before NewIdx which is not OldIdx? LiveRange::iterator Next = std::next(OldIdxIn); diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 7a57cd6890d10..5ef907b883155 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -49,10 +49,6 @@ static cl::opt cl::value_desc("N"), cl::desc("Function number to canonicalize.")); -static cl::opt CanonicalizeBasicBlockNumber( - "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), - cl::desc("BasicBlock number to canonicalize.")); - namespace { class MIRCanonicalizer : public MachineFunctionPass { @@ -374,24 +370,7 @@ static bool doDefKillClear(MachineBasicBlock *MBB) { } static bool runOnBasicBlock(MachineBasicBlock *MBB, - std::vector &bbNames, - unsigned &basicBlockNum, VRegRenamer &Renamer) { - - if (CanonicalizeBasicBlockNumber != ~0U) { - if (CanonicalizeBasicBlockNumber != basicBlockNum++) - return false; - LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() - << "\n";); - } - - if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { - LLVM_DEBUG({ - dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() - << "\n"; - }); - return false; - } - + unsigned BasicBlockNum, VRegRenamer &Renamer) { LLVM_DEBUG({ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; dbgs() << "\n\n================================================\n\n"; @@ -399,7 +378,6 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, bool Changed = false; - bbNames.push_back(MBB->getName()); LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; @@ -412,8 +390,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, Changed |= rescheduleCanonically(IdempotentInstCount, MBB); LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); - Changed |= Renamer.renameVRegs(MBB); + Changed |= Renamer.renameVRegs(MBB, BasicBlockNum); + // TODO: Consider dropping this. Dropping kill defs is probably not + // semantically sound. Changed |= doDefKillClear(MBB); LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); @@ -445,16 +425,12 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() << "\n\n================================================\n\n";); - std::vector BBNames; - unsigned BBNum = 0; - bool Changed = false; - MachineRegisterInfo &MRI = MF.getRegInfo(); VRegRenamer Renamer(MRI); for (auto MBB : RPOList) - Changed |= runOnBasicBlock(MBB, BBNames, BBNum, Renamer); + Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); return Changed; } diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp index 62d0f2e52c7d2..9f61dd9ef243a 100644 --- a/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -57,9 +57,10 @@ class MIRNamer : public MachineFunctionPass { VRegRenamer Renamer(MF.getRegInfo()); + unsigned BBIndex = 0; ReversePostOrderTraversal RPOT(&*MF.begin()); for (auto &MBB : RPOT) - Changed |= Renamer.renameVRegs(MBB); + Changed |= Renamer.renameVRegs(MBB, BBIndex++); return Changed; } diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/llvm/lib/CodeGen/MIRVRegNamerUtils.h index ebe309757f27c..8e76bfa2bbd44 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.h +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.h @@ -84,7 +84,7 @@ class VRegRenamer { /// Same as the above, but sets a BBNum depending on BB traversal that /// will be used as prefix for the vreg names. - bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum = 0); + bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum); unsigned getCurrentBBNumber() const { return CurrentBBNumber; } }; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 71354ea43453e..f433c4b6c90b5 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1395,8 +1395,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, --N; - MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(*I).analyzePhysReg(Reg, TRI); + PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI); // Register is live when we read it here. if (Info.Read) @@ -1434,8 +1433,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, --N; - MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(*I).analyzePhysReg(Reg, TRI); + PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI); // Defs happen after uses so they take precedence if both are present. diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 34ece614185c3..6db388c2564a2 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -37,6 +37,15 @@ // ... // No clobber of %R0 // %R1 = COPY %R0 <<< Removed // +// or +// +// $R0 = OP ... +// ... // No read/clobber of $R0 and $R1 +// $R1 = COPY $R0 // $R0 is killed +// Replace $R0 with $R1 and remove the COPY +// $R1 = OP ... +// ... +// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -98,6 +107,28 @@ class CopyTracker { } } + /// Remove register from copy maps. + void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) { + // Since Reg might be a subreg of some registers, only invalidate Reg is not + // enough. We have to find the COPY defines Reg or registers defined by Reg + // and invalidate all of them. + DenseSet RegsToInvalidate{Reg}; + for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { + auto I = Copies.find(*RUI); + if (I != Copies.end()) { + if (MachineInstr *MI = I->second.MI) { + RegsToInvalidate.insert(MI->getOperand(0).getReg()); + RegsToInvalidate.insert(MI->getOperand(1).getReg()); + } + RegsToInvalidate.insert(I->second.DefRegs.begin(), + I->second.DefRegs.end()); + } + } + for (unsigned InvalidReg : RegsToInvalidate) + for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI) + Copies.erase(*RUI); + } + /// Clobber a single register, removing it from the tracker's copy maps. void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { @@ -151,6 +182,38 @@ class CopyTracker { return CI->second.MI; } + MachineInstr *findCopyDefViaUnit(unsigned RegUnit, + const TargetRegisterInfo &TRI) { + auto CI = Copies.find(RegUnit); + if (CI == Copies.end()) + return nullptr; + if (CI->second.DefRegs.size() != 1) + return nullptr; + MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI); + return findCopyForUnit(*RUI, TRI, true); + } + + MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg, + const TargetRegisterInfo &TRI) { + MCRegUnitIterator RUI(Reg, &TRI); + MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); + if (!AvailCopy || + !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) + return nullptr; + + Register AvailSrc = AvailCopy->getOperand(1).getReg(); + Register AvailDef = AvailCopy->getOperand(0).getReg(); + for (const MachineInstr &MI : + make_range(AvailCopy->getReverseIterator(), I.getReverseIterator())) + for (const MachineOperand &MO : MI.operands()) + if (MO.isRegMask()) + // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef? + if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef)) + return nullptr; + + return AvailCopy; + } + MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg, const TargetRegisterInfo &TRI) { // We check the first RegUnit here, since we'll only be interested in the @@ -211,11 +274,16 @@ class MachineCopyPropagation : public MachineFunctionPass { void ClobberRegister(unsigned Reg); void ReadRegister(unsigned Reg, MachineInstr &Reader, DebugType DT); - void CopyPropagateBlock(MachineBasicBlock &MBB); + void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); + void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); + void propagateDefs(MachineInstr &MI); bool isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); + bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy, + const MachineInstr &UseI, + unsigned UseIdx); bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); /// Candidates for deletion. @@ -313,6 +381,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, return true; } +bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( + const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { + Register Def = Copy.getOperand(0).getReg(); + + if (const TargetRegisterClass *URC = + UseI.getRegClassConstraint(UseIdx, TII, TRI)) + return URC->contains(Def); + + // We don't process further if UseI is a COPY, since forward copy propagation + // should handle that. + return false; +} + /// Decide whether we should forward the source of \param Copy to its use in /// \param UseI based on the physical register class constraints of the opcode /// and avoiding introducing more cross-class COPYs. @@ -468,8 +549,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { } } -void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); +void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName() + << "\n"); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; @@ -647,6 +729,134 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { Tracker.clear(); } +static bool isBackwardPropagatableCopy(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + assert(MI.isCopy() && "MI is expected to be a COPY"); + Register Def = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + if (!Def || !Src) + return false; + + if (MRI.isReserved(Def) || MRI.isReserved(Src)) + return false; + + return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill(); +} + +void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { + if (!Tracker.hasAnyCopies()) + return; + + for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd; + ++OpIdx) { + MachineOperand &MODef = MI.getOperand(OpIdx); + + if (!MODef.isReg() || MODef.isUse()) + continue; + + // Ignore non-trivial cases. + if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit()) + continue; + + if (!MODef.getReg()) + continue; + + // We only handle if the register comes from a vreg. + if (!MODef.isRenamable()) + continue; + + MachineInstr *Copy = + Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI); + if (!Copy) + continue; + + Register Def = Copy->getOperand(0).getReg(); + Register Src = Copy->getOperand(1).getReg(); + + if (MODef.getReg() != Src) + continue; + + if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx)) + continue; + + if (hasImplicitOverlap(MI, MODef)) + continue; + + LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI) + << "\n with " << printReg(Def, TRI) << "\n in " + << MI << " from " << *Copy); + + MODef.setReg(Def); + MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + + LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); + MaybeDeadCopies.insert(Copy); + Changed = true; + } +} + +void MachineCopyPropagation::BackwardCopyPropagateBlock( + MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName() + << "\n"); + + for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + I != E;) { + MachineInstr *MI = &*I; + ++I; + + // Ignore non-trivial COPYs. + if (MI->isCopy() && MI->getNumOperands() == 2 && + !TRI->regsOverlap(MI->getOperand(0).getReg(), + MI->getOperand(1).getReg())) { + + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); + + // Unlike forward cp, we don't invoke propagateDefs here, + // just let forward cp do COPY-to-COPY propagation. + if (isBackwardPropagatableCopy(*MI, *MRI)) { + Tracker.invalidateRegister(Src, *TRI); + Tracker.invalidateRegister(Def, *TRI); + Tracker.trackCopy(MI, *TRI); + continue; + } + } + + // Invalidate any earlyclobber regs first. + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isEarlyClobber()) { + Register Reg = MO.getReg(); + if (!Reg) + continue; + Tracker.invalidateRegister(Reg, *TRI); + } + + propagateDefs(*MI); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + + if (!MO.getReg()) + continue; + + if (MO.isDef()) + Tracker.invalidateRegister(MO.getReg(), *TRI); + + if (MO.readsReg()) + Tracker.invalidateRegister(MO.getReg(), *TRI); + } + } + + for (auto *Copy : MaybeDeadCopies) + Copy->eraseFromParent(); + + MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); + Tracker.clear(); +} + bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -657,8 +867,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) - CopyPropagateBlock(MBB); + for (MachineBasicBlock &MBB : MF) { + BackwardCopyPropagateBlock(MBB); + ForwardCopyPropagateBlock(MBB); + } return Changed; } diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index 18df5c69a22d9..94865b0e9031c 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -278,22 +278,18 @@ bool llvm::finalizeBundles(MachineFunction &MF) { return Changed; } -//===----------------------------------------------------------------------===// -// MachineOperand iterator -//===----------------------------------------------------------------------===// - -MachineOperandIteratorBase::VirtRegInfo -MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, - SmallVectorImpl > *Ops) { - VirtRegInfo RI = { false, false, false }; - for(; isValid(); ++*this) { - MachineOperand &MO = deref(); +VirtRegInfo llvm::AnalyzeVirtRegInBundle( + MachineInstr &MI, unsigned Reg, + SmallVectorImpl> *Ops) { + VirtRegInfo RI = {false, false, false}; + for (MIBundleOperands O(MI); O.isValid(); ++O) { + MachineOperand &MO = *O; if (!MO.isReg() || MO.getReg() != Reg) continue; // Remember each (MI, OpNo) that refers to Reg. if (Ops) - Ops->push_back(std::make_pair(MO.getParent(), getOperandNo())); + Ops->push_back(std::make_pair(MO.getParent(), O.getOperandNo())); // Both defs and uses can read virtual registers. if (MO.readsReg()) { @@ -305,22 +301,22 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, // Only defs can write. if (MO.isDef()) RI.Writes = true; - else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo())) + else if (!RI.Tied && + MO.getParent()->isRegTiedToDefOperand(O.getOperandNo())) RI.Tied = true; } return RI; } -MachineOperandIteratorBase::PhysRegInfo -MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, - const TargetRegisterInfo *TRI) { +PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg, + const TargetRegisterInfo *TRI) { bool AllDefsDead = true; PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; assert(Register::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); - for (; isValid(); ++*this) { - MachineOperand &MO = deref(); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + const MachineOperand &MO = *O; if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { PRI.Clobbered = true; diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp index 0c2ef3321e0a8..aff67f9cfd55f 100644 --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -107,14 +107,16 @@ struct MachineBasicBlockBFIAdapter { bool llvm::shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { + const MachineBlockFrequencyInfo *MBFI, + PGSOQueryType QueryType) { return shouldFuncOptimizeForSizeImpl( - MF, PSI, MBFI); + MF, PSI, MBFI, QueryType); } bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { + const MachineBlockFrequencyInfo *MBFI, + PGSOQueryType QueryType) { return shouldOptimizeForSizeImpl( - MBB, PSI, MBFI); + MBB, PSI, MBFI, QueryType); } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 6d88aae70af39..ca57e51268e88 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -124,8 +124,8 @@ namespace { void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); if (Register::isPhysicalRegister(Reg)) - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - RV.push_back(*SubRegs); + for (const MCPhysReg &SubReg : TRI->subregs(Reg)) + RV.push_back(SubReg); } struct BBInfo { @@ -802,18 +802,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB live-in list contains non-physical register", MBB); continue; } - for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - regsLive.insert(*SubRegs); + for (const MCPhysReg &SubReg : TRI->subregs_inclusive(LI.PhysReg)) + regsLive.insert(SubReg); } } const MachineFrameInfo &MFI = MF->getFrameInfo(); BitVector PR = MFI.getPristineRegs(*MF); for (unsigned I : PR.set_bits()) { - for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - regsLive.insert(*SubRegs); + for (const MCPhysReg &SubReg : TRI->subregs_inclusive(I)) + regsLive.insert(SubReg); } regsKilled.clear(); @@ -1610,13 +1608,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } else if (MONum < MCID.getNumOperands()) { const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // Don't check if it's the last operand in a variadic instruction. See, - // e.g., LDM_RET in the arm back end. - if (MO->isReg() && - !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { - if (MO->isDef() && !MCOI.isOptionalDef()) - report("Explicit operand marked as def", MO, MONum); - if (MO->isImplicit()) - report("Explicit operand marked as implicit", MO, MONum); + // e.g., LDM_RET in the arm back end. Check non-variadic operands only. + bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1; + if (!IsOptional) { + if (MO->isReg()) { + if (MO->isDef() && !MCOI.isOptionalDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + + // Check that an instruction has register operands only as expected. + if (MCOI.OperandType == MCOI::OPERAND_REGISTER && + !MO->isReg() && !MO->isFI()) + report("Expected a register operand.", MO, MONum); + if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE || + MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg()) + report("Expected a non-register operand.", MO, MONum); } int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); @@ -2006,9 +2014,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { bool Bad = !isReserved(Reg); // We are fine if just any subregister has a defined value. if (Bad) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); - ++SubRegs) { - if (regsLive.count(*SubRegs)) { + + for (const MCPhysReg &SubReg : TRI->subregs(Reg)) { + if (regsLive.count(SubReg)) { Bad = false; break; } @@ -2026,9 +2034,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!Register::isPhysicalRegister(MOP.getReg())) continue; - for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); - ++SubRegs) { - if (*SubRegs == Reg) { + for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) { + if (SubReg == Reg) { Bad = false; break; } diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp index d21eae222af03..26cbc14166be4 100644 --- a/llvm/lib/CodeGen/MacroFusion.cpp +++ b/llvm/lib/CodeGen/MacroFusion.cpp @@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) { return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; } +static SUnit *getPredClusterSU(const SUnit &SU) { + for (const SDep &SI : SU.Preds) + if (SI.isCluster()) + return SI.getSUnit(); + + return nullptr; +} + +static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { + unsigned Num = 1; + const SUnit *CurrentSU = &SU; + while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++; + return Num < FuseLimit; +} + static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge @@ -161,8 +176,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) if (DepSU.isBoundaryNode()) continue; + // Only chain two instructions together at most. const MachineInstr *DepMI = DepSU.getInstr(); - if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + if (!hasLessThanNumFused(DepSU, 2) || + !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) continue; if (fuseInstructionPair(DAG, DepSU, AnchorSU)) diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 2850033e64196..ad7f910be4c52 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -189,7 +190,85 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { return LatestDef; } +MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) { + return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); +} + +bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) { + MachineBasicBlock *ParentA = A->getParent(); + MachineBasicBlock *ParentB = B->getParent(); + if (ParentA != ParentB) + return false; + + return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg); +} + +MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, + int InstId) { + assert(static_cast(MBB->getNumber()) < MBBReachingDefs.size() && + "Unexpected basic block number."); + assert(InstId < static_cast(MBB->size()) && + "Unexpected instruction id."); + + if (InstId < 0) + return nullptr; + + for (auto &MI : *MBB) { + if (InstIds.count(&MI) && InstIds[&MI] == InstId) + return &MI; + } + return nullptr; +} + int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { assert(InstIds.count(MI) && "Unexpected machine instuction."); return InstIds[MI] - getReachingDef(MI, PhysReg); } + +void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, + SmallVectorImpl &Uses) { + MachineBasicBlock *MBB = Def->getParent(); + MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); + while (++MI != MBB->end()) { + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg) + continue; + + // If/when we find a new reaching def, we know that there's no more uses + // of 'Def'. + if (getReachingMIDef(&*MI, PhysReg) != Def) + return; + + Uses.push_back(&*MI); + if (MO.isKill()) + return; + } + } +} + +unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) { + SmallVector Uses; + getReachingLocalUses(Def, PhysReg, Uses); + return Uses.size(); +} + +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Yes if the register is live out of the basic block. + if (LiveRegs.contains(PhysReg)) + return true; + + // Walk backwards through the block to see if the register is live at some + // point. + for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { + LiveRegs.stepBackward(*Last); + if (LiveRegs.contains(PhysReg)) + return InstIds[&*Last] > InstIds[MI]; + } + return false; +} + diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index 270209293f6a4..a5bea1463468a 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -221,8 +221,8 @@ void RegScavenger::forward() { // Ideally we would like a way to model this, but leaving the // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - if (isRegUsed(*SubRegs)) { + for (const MCPhysReg &SubReg : TRI->subregs(Reg)) + if (isRegUsed(SubReg)) { SubUsed = true; break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 793352c16d35a..2c2f8fea97900 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -131,6 +131,7 @@ namespace { const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; + bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; bool ForCodeSize; @@ -179,6 +180,12 @@ namespace { AddToWorklist(Node); } + /// Convenient shorthand to add a node and all of its user to the worklist. + void AddToWorklistWithUsers(SDNode *N) { + AddUsersToWorklist(N); + AddToWorklist(N); + } + // Prune potentially dangling nodes. This is called after // any visit to a node, but should also be called during a visit after any // failed combine which may have created a DAG node. @@ -1395,6 +1402,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; + LegalDAG = Level >= AfterLegalizeDAG; LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; @@ -1421,14 +1429,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // If this combine is running after legalizing the DAG, re-legalize any // nodes pulled off the worklist. - if (Level == AfterLegalizeDAG) { + if (LegalDAG) { SmallSetVector UpdatedNodes; bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); - for (SDNode *LN : UpdatedNodes) { - AddUsersToWorklist(LN); - AddToWorklist(LN); - } + for (SDNode *LN : UpdatedNodes) + AddToWorklistWithUsers(LN); + if (!NIsValid) continue; } @@ -5332,7 +5339,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (Level >= AfterLegalizeTypes) { + if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're // loads, can be combined to narrow loads and the AND node can be removed. // Perform after legalization so that extend nodes will already be @@ -8724,6 +8731,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -8748,6 +8759,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -9506,11 +9521,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, SDLoc dl(Ld); SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), - Ld->getBasePtr(), Ld->getMask(), - PassThru, Ld->getMemoryVT(), - Ld->getMemOperand(), ExtLoadType, - Ld->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(), + ExtLoadType, Ld->isExpandingLoad()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); return NewLoad; } @@ -13357,9 +13371,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (CFP1) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); - if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || - TLI.isOperationLegal(ISD::ConstantFP, VT))) + if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) return DAG.getNode( ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), @@ -13612,12 +13625,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, EVT VT; unsigned AS; - if (LoadSDNode *LD = dyn_cast(Use)) { + if (LoadSDNode *LD = dyn_cast(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (StoreSDNode *ST = dyn_cast(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else if (MaskedLoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); - } else if (StoreSDNode *ST = dyn_cast(Use)) { + } else if (MaskedStoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); @@ -13651,38 +13674,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, VT.getTypeForEVT(*DAG.getContext()), AS); } -/// Try turning a load/store into a pre-indexed load/store when the base -/// pointer is an add or subtract and it has other uses besides the load/store. -/// After the transformation, the new indexed load/store has effectively folded -/// the add/subtract in and all of its other uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) - return false; - - bool isLoad = true; - SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast(N)) { +static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, + bool &IsLoad, bool &IsMasked, SDValue &Ptr, + const TargetLowering &TLI) { + if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast(N)) { + } else if (StoreSDNode *ST = dyn_cast(N)) { if (ST->isIndexed()) return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT)) + return false; + Ptr = ST->getBasePtr(); + IsLoad = false; + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + if (LD->isIndexed()) + return false; + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) && + !TLI.isIndexedMaskedLoadLegal(Dec, VT)) + return false; + Ptr = LD->getBasePtr(); + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { + if (ST->isIndexed()) + return false; + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) && + !TLI.isIndexedMaskedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); - isLoad = false; + IsLoad = false; + IsMasked = true; } else { return false; } + return true; +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked, + Ptr, TLI)) + return false; // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. @@ -13724,8 +13773,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; // Check #2. - if (!isLoad) { - SDValue Val = cast(N)->getValue(); + if (!IsLoad) { + SDValue Val = IsMasked ? cast(N)->getValue() + : cast(N)->getValue(); // Would require a copy. if (Val == BasePtr) @@ -13801,18 +13851,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; SDValue Result; - if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); - else - Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + if (!IsMasked) { + if (IsLoad) + Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + else + Result = + DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + } else { + if (IsLoad) + Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + else + Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + } ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13866,7 +13924,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); - SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, @@ -13876,7 +13934,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); AddToWorklist(Result.getNode()); @@ -13891,29 +13949,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; - bool isLoad = true; + bool IsLoad = true; + bool IsMasked = false; SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast(N)) { - if (LD->isIndexed()) - return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) - return false; - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast(N)) { - if (ST->isIndexed()) - return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) - return false; - Ptr = ST->getBasePtr(); - isLoad = false; - } else { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, + Ptr, TLI)) return false; - } if (Ptr.getNode()->hasOneUse()) return false; @@ -13949,7 +13990,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // If all the uses are load / store addresses, then don't do the // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { bool RealUse = false; for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) @@ -13975,18 +14016,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { Worklist.push_back(Op); if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13998,7 +14045,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0)); + Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } @@ -16655,11 +16702,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { /// Convert a disguised subvector insertion into a shuffle: SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); SDValue InsertVal = N->getOperand(1); SDValue Vec = N->getOperand(0); - // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) - // --> (vector_shuffle X, Y) + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), + // InsIndex) + // --> (vector_shuffle X, Y) and variations where shuffle operands may be + // CONCAT_VECTORS. if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa(InsertVal.getOperand(1))) { @@ -16672,18 +16723,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { // Vec's operand 0 is using indices from 0 to N-1 and // operand 1 from N to 2N - 1, where N is the number of // elements in the vectors. - int XOffset = -1; - if (InsertVal.getOperand(0) == X) { - XOffset = 0; - } else if (InsertVal.getOperand(0) == Y) { - XOffset = X.getValueType().getVectorNumElements(); + SDValue InsertVal0 = InsertVal.getOperand(0); + int ElementOffset = -1; + + // We explore the inputs of the shuffle in order to see if we find the + // source of the extract_vector_elt. If so, we can use it to modify the + // shuffle rather than perform an insert_vector_elt. + SmallVector, 8> ArgWorkList; + ArgWorkList.emplace_back(Mask.size(), Y); + ArgWorkList.emplace_back(0, X); + + while (!ArgWorkList.empty()) { + int ArgOffset; + SDValue ArgVal; + std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); + + if (ArgVal == InsertVal0) { + ElementOffset = ArgOffset; + break; + } + + // Peek through concat_vector. + if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { + int CurrentArgOffset = + ArgOffset + ArgVal.getValueType().getVectorNumElements(); + int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); + for (SDValue Op : reverse(ArgVal->ops())) { + CurrentArgOffset -= Step; + ArgWorkList.emplace_back(CurrentArgOffset, Op); + } + + // Make sure we went through all the elements and did not screw up index + // computation. + assert(CurrentArgOffset == ArgOffset); + } } - if (XOffset != -1) { + if (ElementOffset != -1) { SmallVector NewMask(Mask.begin(), Mask.end()); auto *ExtrIndex = cast(InsertVal.getOperand(1)); - NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); assert(NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); @@ -16915,8 +16995,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddUsersToWorklist(Load.getNode()); // Add users too - AddToWorklist(Load.getNode()); + AddToWorklistWithUsers(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -20436,7 +20515,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// Result = N X_i + X_i (N - N A X_i) SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags) { - if (Level >= AfterLegalizeDAG) + if (LegalDAG) return SDValue(); // TODO: Handle half and/or extended types? @@ -20575,7 +20654,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Op can be zero. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { - if (Level >= AfterLegalizeDAG) + if (LegalDAG) return SDValue(); // TODO: Handle half and/or extended types? diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index f55c81b2f3d08..0d8a547a92561 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -410,8 +410,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { else if (isa(V)) // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = getRegForValue( - Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + Reg = + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType()))); else if (const auto *CF = dyn_cast(V)) { if (CF->isNullValue()) Reg = fastMaterializeFloatZero(CF); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 17bb98bdddfb5..70cb20e48d20f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1023,8 +1023,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // These pseudo-ops are the same as the other STRICT_ ops except // they are registered with setOperationAction() using the input type // instead of the output type. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getOperand(1).getValueType()); + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); break; case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); @@ -3692,7 +3692,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars); - ReplaceNode(SDValue(Node, 0), Result); + Results.push_back(Result); break; } case ISD::VECREDUCE_FADD: @@ -3720,7 +3720,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - break; + // Return true so that we don't call ConvertNodeToLibcall which also won't + // do anything. + return true; } if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index af963bc028026..a94efe74c9abe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -65,35 +65,60 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::STRICT_FEXP: case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::STRICT_FLOG: case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::STRICT_FMA: case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::STRICT_FMUL: case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; + case ISD::STRICT_FPOW: case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::STRICT_FRINT: case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; @@ -112,6 +137,46 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } } +SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (1 + Offset) && + "Unexpected number of operands!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpVT = N->getOperand(0 + Offset).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (2 + Offset) && + "Unexpected number of operands!"); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -175,84 +240,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, - RTLIB::FMIN_F64, - RTLIB::FMIN_F80, - RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, - RTLIB::FMAX_F64, - RTLIB::FMAX_F80, - RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, RTLIB::CBRT_F64, RTLIB::CBRT_F80, RTLIB::CBRT_F128, - RTLIB::CBRT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + RTLIB::CBRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -304,212 +333,150 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_F128, - RTLIB::COS_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)), - GetSoftenedFloat(N->getOperand(2)) }; + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)), + GetSoftenedFloat(N->getOperand(2 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[3] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType(), - N->getOperand(2).getValueType() }; + EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType(), + N->getOperand(2 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_F128, - RTLIB::FMA_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, + GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, CallOptions, SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(0); - if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { - // Expand Y = FNEG(X) -> Y = X ^ sign mask - APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); - return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), - DAG.getConstant(SignMask, dl, NVT)); - } - - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), - GetSoftenedFloat(N->getOperand(0)) }; - TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, dl).first; + // Expand Y = FNEG(X) -> Y = X ^ sign mask + APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), + DAG.getConstant(SignMask, dl, NVT)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's // entirely possible for both f16 and f32 to be legal, so use the fully // hard-float FP_EXTEND rather than FP16_TO_FP. if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { - Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), + { MVT::f32, MVT::Other }, { Chain, Op }); + Chain = Op.getValue(1); + } else { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + } + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) AddToWorklist(Op.getNode()); } @@ -526,9 +493,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -551,41 +523,36 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); - if (N->getValueType(0) == MVT::f16) { - // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a - // storage-only type get a chance to select things. - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); - } - + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_F128, - RTLIB::POW_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { - assert(N->getOperand(1).getValueType() == MVT::i32 && + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 && "Unsupported power type!"); RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, @@ -601,124 +568,82 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { } EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_F128, - RTLIB::REM_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - if (N->getValueType(0) == MVT::f16) - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -845,18 +770,25 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::STRICT_LROUND: case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; + case ISD::STRICT_LLROUND: case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; + case ISD::STRICT_LRINT: case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; + case ISD::STRICT_LLRINT: case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -868,7 +800,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand promotion"); + "Invalid operand softening"); ReplaceValueWith(SDValue(N, 0), Res); return false; @@ -880,42 +812,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { - // If we get here, the result must be legal but the source illegal. - EVT SVT = N->getOperand(0).getValueType(); - EVT RVT = N->getValueType(0); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (SVT == MVT::f16) - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); - - RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; -} - - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. - assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_ROUND); - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -943,8 +867,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { - bool Signed = N->getOpcode() == ISD::FP_TO_SINT; - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; + + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); SDLoc dl(N); @@ -960,18 +888,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { NVT = (MVT::SimpleValueType)IntVT; // The type needs to big enough to hold the result. if (NVT.bitsGE(RVT)) - LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT); } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + Op = GetSoftenedFloat(Op); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, dl, Chain); // Truncate the result if the libcall returns a larger type. - return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); + SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first); + + if (!IsStrict) + return Res; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -1039,72 +975,99 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LROUND_F32, - RTLIB::LROUND_F64, - RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT LVT = LHS.getValueType(); + EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits()); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RSize - LSize; + if (SizeDiff > 0) { + RHS = + DAG.getNode(ISD::SRL, dl, RVT, RHS, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS); + } else if (SizeDiff < 0) { + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS); + RHS = + DAG.getNode(ISD::SHL, dl, ILVT, RHS, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + } + + RHS = DAG.getBitcast(LVT, RHS); + return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, - RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(0 + Offset).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + + return Tmp.first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LRINT_F32, - RTLIB::LRINT_F64, - RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, - RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); } //===----------------------------------------------------------------------===// @@ -1145,36 +1108,61 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::STRICT_FCOS: case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::STRICT_FDIV: case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::STRICT_FEXP: case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::STRICT_FLOG: case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::STRICT_FMA: case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; + case ISD::STRICT_FMUL: case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::STRICT_FPOW: case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::STRICT_FRINT: case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::STRICT_FROUND: case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; } @@ -1198,6 +1186,36 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, dl, NVT); } +void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = N->getOperand(0 + Offset); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), + Op, CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), + Ops, CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && @@ -1214,190 +1232,159 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, - RTLIB::CBRT_F64, RTLIB::CBRT_F80, - RTLIB::CBRT_F128, RTLIB::CBRT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, + RTLIB::CBRT_F64, RTLIB::CBRT_F80, + RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::COPYSIGN_F32, - RTLIB::COPYSIGN_F64, - RTLIB::COPYSIGN_F80, - RTLIB::COPYSIGN_F128, - RTLIB::COPYSIGN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, + RTLIB::COPYSIGN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset), + N->getOperand(2 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + std::pair Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + RTLIB::MUL_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, @@ -1412,106 +1399,105 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + bool IsStrict = N->isStrictFPOpcode(); + + SDValue Chain; + if (IsStrict) { + // If the expanded type is the same as the input type, just bypass the node. + if (NVT == N->getOperand(1).getValueType()) { + Hi = N->getOperand(1); + Chain = N->getOperand(0); + } else { + // Other we need to extend. + Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + Chain = Hi.getValue(1); + } + } else { + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + } + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), dl, NVT); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, @@ -1652,8 +1638,11 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break; @@ -1742,34 +1731,72 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + bool IsStrict = N->isStrictFPOpcode(); + assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); SDValue Lo, Hi; - GetExpandedFloat(N->getOperand(0), Lo, Hi); - // Round it the rest of the way (e.g. to f32) if needed. - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), - N->getValueType(0), Hi, N->getOperand(1)); + GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi); + + if (!IsStrict) + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0), Hi, N->getOperand(1)); + + // Eliminate the node if the input float type is the same as the output float + // type. + if (Hi.getValueType() == N->getValueType(0)) { + // Connect the output chain to the input chain, unlinking the node. + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Hi); + return SDValue(); + } + + SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + {N->getValueType(0), MVT::Other}, + {N->getOperand(0), Hi, N->getOperand(2)}); + ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Expansion); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, dl, Chain); + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - CallOptions, dl).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, dl, Chain); + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 56c13bb0753d2..dd082646ae5ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -592,8 +592,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - N->getMask(), ExtPassThru, N->getMemoryVT(), - N->getMemOperand(), ISD::EXTLOAD); + N->getOffset(), N->getMask(), ExtPassThru, + N->getMemoryVT(), N->getMemOperand(), + N->getAddressingMode(), ISD::EXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -1485,11 +1486,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, SDLoc dl(N); bool TruncateStore = false; - if (OpNo == 3) { + if (OpNo == 4) { Mask = PromoteTargetBoolean(Mask, DataVT); // Update in place. SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[3] = Mask; + NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } else { // Data operand assert(OpNo == 1 && "Unexpected operand for promotion"); @@ -1497,14 +1498,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, - N->getMemoryVT(), N->getMemOperand(), + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), TruncateStore, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Only know how to promote the mask!"); + assert(OpNo == 3 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector NewOps(N->op_begin(), N->op_end()); @@ -1696,7 +1698,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::STRICT_LLROUND: case ISD::STRICT_LLRINT: @@ -2562,7 +2566,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); @@ -2570,8 +2576,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -2579,15 +2589,21 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 9ddcbc9065251..7a97d980f9e4f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -974,32 +974,6 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } -/// Convert the node into a libcall with the same prototype. -SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, - bool isSigned) { - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(isSigned); - unsigned NumOps = N->getNumOperands(); - SDLoc dl(N); - if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, - dl).first; - } else if (NumOps == 1) { - SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, - dl).first; - } else if (NumOps == 2) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, - dl).first; - } - SmallVector Ops(NumOps); - for (unsigned i = 0; i < NumOps; ++i) - Ops[i] = N->getOperand(i); - - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; -} - /// Promote the given target boolean to a target boolean of the given type. /// A target boolean is an integer value, not necessarily of type i1, the bits /// of which conform to getBooleanContents. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c944bda3700bf..42597fcd12ecb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -215,7 +215,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue JoinIntegers(SDValue Lo, SDValue Hi); - SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); std::pair ExpandAtomic(SDNode *Node); @@ -483,6 +482,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Convert Float Results to Integer. void SoftenFloatResult(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); @@ -528,9 +529,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_LROUND(SDNode *N); @@ -540,6 +541,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp @@ -557,6 +559,10 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Float Result Expansion. void ExpandFloatResult(SDNode *N, unsigned ResNo); void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7bca3ea888ec4..9403b344ea747 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1541,12 +1541,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { + assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(MLD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); + SDValue Offset = MLD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); unsigned Alignment = MLD->getOriginalAlignment(); @@ -1578,8 +1581,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); @@ -1590,8 +1594,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the // other one. @@ -2326,8 +2331,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed masked store of vector?"); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); @@ -2361,8 +2369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - N->isTruncatingStore(), + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, @@ -2374,8 +2382,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - N->isTruncatingStore(), N->isCompressingStore()); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. @@ -3699,10 +3708,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); - SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, PassThru, N->getMemoryVT(), - N->getMemOperand(), ExtType, - N->isExpandingLoad()); + SDValue Res = DAG.getMaskedLoad( + WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + ExtType, N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -4447,7 +4456,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), - Mask, MST->getMemoryVT(), MST->getMemOperand(), + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), false, MST->isCompressingStore()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f1b88d80f43be..c1c599c5a5d83 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3350,20 +3350,20 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, KnownBits N0Known = computeKnownBits(N0); bool overflow; - (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); + (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow); if (!overflow) return OFK_Never; } // mulhi + 1 never overflow if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && - (~N1Known.Zero & 0x01) == ~N1Known.Zero) + (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue()) return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { KnownBits N0Known = computeKnownBits(N0); - if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) + if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue()) return OFK_Never; } @@ -6975,16 +6975,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue PassThru, - EVT MemVT, MachineMemOperand *MMO, + SDValue Base, SDValue Offset, SDValue Mask, + SDValue PassThru, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked load with an offset!"); + SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); + dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -6992,7 +6998,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return SDValue(E, 0); } auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, - ExtTy, isExpanding, MemVT, MMO); + AM, ExtTy, isExpanding, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7002,27 +7008,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return V; } +SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedLoadSDNode *LD = cast(OrigLoad); + assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); + return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, + Offset, LD->getMask(), LD->getPassThru(), + LD->getMemoryVT(), LD->getMemOperand(), AM, + LD->getExtensionType(), LD->isExpandingLoad()); +} + SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, - SDValue Val, SDValue Ptr, SDValue Mask, - EVT MemVT, MachineMemOperand *MMO, - bool IsTruncating, bool IsCompressing) { + SDValue Val, SDValue Base, SDValue Offset, + SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Val, Ptr, Mask }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, - IsTruncating, IsCompressing, MemVT, MMO); + auto *N = + newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7032,6 +7056,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, return V; } +SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedStoreSDNode *ST = cast(OrigStore); + assert(ST->getOffset().isUndef() && + "Masked store is already a indexed store!"); + return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, + ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), + AM, ST->isTruncatingStore(), ST->isCompressingStore()); +} + SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, @@ -7287,8 +7322,40 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); -#if 0 switch (Opcode) { + case ISD::STRICT_FP_EXTEND: + assert(VTList.NumVTs == 2 && Ops.size() == 2 && + "Invalid STRICT_FP_EXTEND!"); + assert(VTList.VTs[0].isFloatingPoint() && + Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!"); + assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && + "STRICT_FP_EXTEND result type should be vector iff the operand " + "type is vector!"); + assert((!VTList.VTs[0].isVector() || + VTList.VTs[0].getVectorNumElements() == + Ops[1].getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) && + "Invalid fpext node, dst <= src!"); + break; + case ISD::STRICT_FP_ROUND: + assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!"); + assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && + "STRICT_FP_ROUND result type should be vector iff the operand " + "type is vector!"); + assert((!VTList.VTs[0].isVector() || + VTList.VTs[0].getVectorNumElements() == + Ops[1].getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(VTList.VTs[0].isFloatingPoint() && + Ops[1].getValueType().isFloatingPoint() && + VTList.VTs[0].bitsLT(Ops[1].getValueType()) && + isa(Ops[2]) && + (cast(Ops[2])->getZExtValue() == 0 || + cast(Ops[2])->getZExtValue() == 1) && + "Invalid STRICT_FP_ROUND!"); + break; +#if 0 // FIXME: figure out how to safely handle things like // int foo(int x) { return 1 << (x & 255); } // int bar() { return foo(256); } @@ -7307,8 +7374,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } break; - } #endif + } // Memoize the node unless it returns a flag. SDNode *N; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1ed0dc2c979fc..0aeb3c14aa370 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4295,6 +4295,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4311,9 +4312,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // vectors. VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false /* Truncating */, - IsCompressing); + SDValue StoreNode = + DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, + ISD::UNINDEXED, false /* Truncating */, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -4461,6 +4462,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4491,8 +4493,9 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD, IsExpanding); + SDValue Load = + DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, + ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bc10f76212394..f863d9876486b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -685,6 +685,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (doExt) OS << " from " << MLd->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MLd->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MLd->isExpandingLoad()) OS << ", expanding"; @@ -696,6 +700,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (MSt->isTruncatingStore()) OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MSt->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MSt->isCompressingStore()) OS << ", compressing"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 79dbd44bb4772..a03f7923d71e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -148,17 +148,17 @@ static cl::opt ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, cl::desc("Pop up a window to show dags before legalize types")); static cl::opt -ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, - cl::desc("Pop up a window to show dags before legalize")); + ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post " + "legalize types dag combine pass")); +static cl::opt + ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); static cl::opt ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the second " "dag combine pass")); static cl::opt -ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, - cl::desc("Pop up a window to show dags before the post legalize types" - " dag combine pass")); -static cl::opt ViewISelDAGs("view-isel-dags", cl::Hidden, cl::desc("Pop up a window to show isel dags as they are selected")); static cl::opt @@ -168,12 +168,10 @@ static cl::opt ViewSUnitDAGs("view-sunit-dags", cl::Hidden, cl::desc("Pop up a window to show SUnit dags after they are processed")); #else -static const bool ViewDAGCombine1 = false, - ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, - ViewDAGCombine2 = false, - ViewDAGCombineLT = false, - ViewISelDAGs = false, ViewSchedDAGs = false, - ViewSUnitDAGs = false; +static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false, + ViewDAGCombineLT = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, ViewISelDAGs = false, + ViewSchedDAGs = false, ViewSUnitDAGs = false; #endif //===---------------------------------------------------------------------===// @@ -790,8 +788,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { FuncInfo->MBB->getBasicBlock()->getName()); #endif #ifdef NDEBUG - if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || - ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT || + ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) #endif { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c24a3670c9867..1e51ec2d6a219 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6225,6 +6225,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } + // If none of the above worked, but there are no NaNs, then expand to + // a compare/select sequence. This is required for correctness since + // InstCombine might have canonicalized a fcmp+select sequence to a + // FMINNUM/FMAXNUM node. If we were to fall through to the default + // expansion to libcall, we might introduce a link-time dependency + // on libm into a file that originally did not have one. + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = + Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index af7dc432eae5b..cc436fcc4f684 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -633,6 +633,8 @@ void TargetLoweringBase::initActions() { IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { setIndexedLoadAction(IM, VT, Expand); setIndexedStoreAction(IM, VT, Expand); + setIndexedMaskedLoadAction(IM, VT, Expand); + setIndexedMaskedStoreAction(IM, VT, Expand); } // Most backends expect to see the node which just returns the value loaded. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 7b547d41fb60b..41cb511ad9b47 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -179,10 +179,10 @@ static cl::opt UseCFLAA( /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. -static const char *StartAfterOptName = "start-after"; -static const char *StartBeforeOptName = "start-before"; -static const char *StopAfterOptName = "stop-after"; -static const char *StopBeforeOptName = "stop-before"; +static const char StartAfterOptName[] = "start-after"; +static const char StartBeforeOptName[] = "start-before"; +static const char StopAfterOptName[] = "stop-after"; +static const char StopBeforeOptName[] = "stop-before"; static cl::opt StartAfterOpt(StringRef(StartAfterOptName), diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/CodeGen/TypePromotion.cpp similarity index 78% rename from llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp rename to llvm/lib/CodeGen/TypePromotion.cpp index 1c2c8aef55bb8..94fe7d2c70304 100644 --- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -1,4 +1,4 @@ -//===----- ARMCodeGenPrepare.cpp ------------------------------------------===// +//===----- TypePromotion.cpp ----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,23 +7,25 @@ //===----------------------------------------------------------------------===// // /// \file -/// This pass inserts intrinsics to handle small types that would otherwise be -/// promoted during legalization. Here we can manually promote types or insert -/// intrinsics which can handle narrow types that aren't supported by the -/// register classes. -// +/// This is an opcode based type promotion pass for small types that would +/// otherwise be promoted during legalisation. This works around the limitations +/// of selection dag for cyclic regions. The search begins from icmp +/// instructions operands where a tree, consisting of non-wrapping or safe +/// wrapping instructions, is built, checked and promoted if possible. +/// //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMSubtarget.h" -#include "ARMTargetMachine.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -32,26 +34,19 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#define DEBUG_TYPE "arm-codegenprepare" +#define DEBUG_TYPE "type-promotion" +#define PASS_NAME "Type Promotion" using namespace llvm; static cl::opt -DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true), - cl::desc("Disable ARM specific CodeGenPrepare pass")); - -static cl::opt -EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false), - cl::desc("Use DSP instructions for scalar operations")); - -static cl::opt -EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false), - cl::desc("Use DSP instructions for scalar operations\ - with immediate operands")); +DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(true), + cl::desc("Disable type promotion pass")); // The goal of this pass is to enable more efficient code generation for // operations on narrow types (i.e. types with < 32-bits) and this is a @@ -111,7 +106,6 @@ class IRPromoter { SmallPtrSet InstsToRemove; DenseMap> TruncTysMap; SmallPtrSet Promoted; - Module *M = nullptr; LLVMContext &Ctx; // The type we promote to: always i32 IntegerType *ExtTy = nullptr; @@ -134,11 +128,10 @@ class IRPromoter { void Cleanup(void); public: - IRPromoter(Module *M) : M(M), Ctx(M->getContext()), - ExtTy(Type::getInt32Ty(Ctx)) { } + IRPromoter(Module *M) : Ctx(M->getContext()) { } - void Mutate(Type *OrigTy, + void Mutate(Type *OrigTy, unsigned PromotedWidth, SetVector &Visited, SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks, @@ -146,30 +139,29 @@ class IRPromoter { SmallPtrSetImpl &SafeWrap); }; -class ARMCodeGenPrepare : public FunctionPass { - const ARMSubtarget *ST = nullptr; +class TypePromotion : public FunctionPass { IRPromoter *Promoter = nullptr; - std::set AllVisited; + SmallPtrSet AllVisited; SmallPtrSet SafeToPromote; SmallPtrSet SafeWrap; bool isSafeWrap(Instruction *I); bool isSupportedValue(Value *V); bool isLegalToPromote(Value *V); - bool TryToPromote(Value *V); + bool TryToPromote(Value *V, unsigned PromotedWidth); public: static char ID; static unsigned TypeSize; Type *OrigTy = nullptr; - ARMCodeGenPrepare() : FunctionPass(ID) {} + TypePromotion() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } - StringRef getPassName() const override { return "ARM IR optimizations"; } + StringRef getPassName() const override { return PASS_NAME; } bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; @@ -188,19 +180,19 @@ static bool GenerateSignBits(Value *V) { } static bool EqualTypeSize(Value *V) { - return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize; + return V->getType()->getScalarSizeInBits() == TypePromotion::TypeSize; } static bool LessOrEqualTypeSize(Value *V) { - return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize; + return V->getType()->getScalarSizeInBits() <= TypePromotion::TypeSize; } static bool GreaterThanTypeSize(Value *V) { - return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize; + return V->getType()->getScalarSizeInBits() > TypePromotion::TypeSize; } static bool LessThanTypeSize(Value *V) { - return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize; + return V->getType()->getScalarSizeInBits() < TypePromotion::TypeSize; } /// Some instructions can use 8- and 16-bit operands, and we don't need to @@ -278,7 +270,7 @@ static bool isSink(Value *V) { } /// Return whether this instruction can safely wrap. -bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) { +bool TypePromotion::isSafeWrap(Instruction *I) { // We can support a, potentially, wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. @@ -374,7 +366,7 @@ bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) { Total += OverflowConst->getValue().getBitWidth() < 32 ? OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs(); - APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize); + APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize); if (Total.getBitWidth() > Max.getBitWidth()) { if (Total.ugt(Max.zext(Total.getBitWidth()))) @@ -385,7 +377,7 @@ bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) { } else if (Total.ugt(Max)) return false; - LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " << *I << "\n"); SafeWrap.insert(I); return true; } @@ -422,32 +414,12 @@ static bool isPromotedResultSafe(Value *V) { return cast(V)->hasNoUnsignedWrap(); } -/// Return the intrinsic for the instruction that can perform the same -/// operation but on a narrow type. This is using the parallel dsp intrinsics -/// on scalar values. -static Intrinsic::ID getNarrowIntrinsic(Instruction *I) { - // Whether we use the signed or unsigned versions of these intrinsics - // doesn't matter because we're not using the GE bits that they set in - // the APSR. - switch(I->getOpcode()) { - default: - break; - case Instruction::Add: - return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 : - Intrinsic::arm_uadd8; - case Instruction::Sub: - return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 : - Intrinsic::arm_usub8; - } - llvm_unreachable("unhandled opcode for narrow intrinsic"); -} - void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { SmallVector Users; Instruction *InstTo = dyn_cast(To); bool ReplacedAll = true; - LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To + LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To << "\n"); for (Use &U : From->uses()) { @@ -468,7 +440,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { } void IRPromoter::PrepareWrappingAdds() { - LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n"); IRBuilder<> Builder{Ctx}; // For adds that safely wrap and use a negative immediate as operand 1, we @@ -479,7 +451,7 @@ void IRPromoter::PrepareWrappingAdds() { if (I->getOpcode() != Instruction::Add) continue; - LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n"); assert((isa(I->getOperand(1)) && cast(I->getOperand(1))->isNegative()) && "Wrapping should have a negative immediate as the second operand"); @@ -494,7 +466,7 @@ void IRPromoter::PrepareWrappingAdds() { } InstsToRemove.insert(I); I->replaceAllUsesWith(NewVal); - LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n"); } for (auto *I : NewInsts) Visited->insert(I); @@ -505,7 +477,7 @@ void IRPromoter::ExtendSources() { auto InsertZExt = [&](Value *V, Instruction *InsertPt) { assert(V->getType() != ExtTy && "zext already extends to i32"); - LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n"); Builder.SetInsertPoint(InsertPt); if (auto *I = dyn_cast(V)) Builder.SetCurrentDebugLocation(I->getDebugLoc()); @@ -523,7 +495,7 @@ void IRPromoter::ExtendSources() { }; // Now, insert extending instructions between the sources and their users. - LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n"); for (auto V : *Sources) { LLVM_DEBUG(dbgs() << " - " << *V << "\n"); if (auto *I = dyn_cast(V)) @@ -539,7 +511,7 @@ void IRPromoter::ExtendSources() { } void IRPromoter::PromoteTree() { - LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n"); IRBuilder<> Builder{Ctx}; @@ -570,38 +542,10 @@ void IRPromoter::PromoteTree() { Promoted.insert(I); } } - - // Finally, any instructions that should be promoted but haven't yet been, - // need to be handled using intrinsics. - for (auto *V : *Visited) { - auto *I = dyn_cast(V); - if (!I) - continue; - - if (Sources->count(I) || Sinks->count(I)) - continue; - - if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I)) - continue; - - assert(EnableDSP && "DSP intrinisc insertion not enabled!"); - - // Replace unsafe instructions with appropriate intrinsic calls. - LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for " - << *I << "\n"); - Function *DSPInst = - Intrinsic::getDeclaration(M, getNarrowIntrinsic(I)); - Builder.SetInsertPoint(I); - Builder.SetCurrentDebugLocation(I->getDebugLoc()); - Value *Args[] = { I->getOperand(0), I->getOperand(1) }; - CallInst *Call = Builder.CreateCall(DSPInst, Args); - NewInsts.insert(Call); - ReplaceAllUsersOfWith(I, Call); - } } void IRPromoter::TruncateSinks() { - LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n"); IRBuilder<> Builder{Ctx}; @@ -612,7 +556,7 @@ void IRPromoter::TruncateSinks() { if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V)) return nullptr; - LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for " + LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for " << *V << "\n"); Builder.SetInsertPoint(cast(V)); auto *Trunc = dyn_cast(Builder.CreateTrunc(V, TruncTy)); @@ -624,7 +568,7 @@ void IRPromoter::TruncateSinks() { // Fix up any stores or returns that use the results of the promoted // chain. for (auto I : *Sinks) { - LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n"); // Handle calls separately as we need to iterate over arg operands. if (auto *Call = dyn_cast(I)) { @@ -661,7 +605,7 @@ void IRPromoter::TruncateSinks() { } void IRPromoter::Cleanup() { - LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc // operands, so remove them for (auto V : *Visited) { @@ -674,7 +618,7 @@ void IRPromoter::Cleanup() { Value *Src = ZExt->getOperand(0); if (ZExt->getSrcTy() == ZExt->getDestTy()) { - LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt + LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt << "\n"); ReplaceAllUsersOfWith(ZExt, Src); continue; @@ -693,7 +637,7 @@ void IRPromoter::Cleanup() { } for (auto *I : InstsToRemove) { - LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n"); I->dropAllReferences(); I->eraseFromParent(); } @@ -707,7 +651,7 @@ void IRPromoter::Cleanup() { } void IRPromoter::ConvertTruncs() { - LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n"); IRBuilder<> Builder{Ctx}; for (auto *V : *Visited) { @@ -731,17 +675,18 @@ void IRPromoter::ConvertTruncs() { } } -void IRPromoter::Mutate(Type *OrigTy, +void IRPromoter::Mutate(Type *OrigTy, unsigned PromotedWidth, SetVector &Visited, SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks, SmallPtrSetImpl &SafeToPromote, SmallPtrSetImpl &SafeWrap) { - LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " - << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to from " + << TypePromotion::TypeSize << " to 32-bits\n"); assert(isa(OrigTy) && "expected integer type"); this->OrigTy = cast(OrigTy); + ExtTy = IntegerType::get(Ctx, PromotedWidth); assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() && "original type not smaller than extended type"); @@ -779,9 +724,7 @@ void IRPromoter::Mutate(Type *OrigTy, // Insert zext instructions between sources and their users. ExtendSources(); - // Promote visited instructions, mutating their types in place. Also insert - // DSP intrinsics, if enabled, for adds and subs which would be unsafe to - // promote. + // Promote visited instructions, mutating their types in place. PromoteTree(); // Convert any truncs, that aren't sources, into AND masks. @@ -794,14 +737,14 @@ void IRPromoter::Mutate(Type *OrigTy, // clear the data structures. Cleanup(); - LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n"); } /// We accept most instructions, as well as Arguments and ConstantInsts. We /// Disallow casts other than zext and truncs and only allow calls if their /// return value is zeroext. We don't allow opcodes that can introduce sign /// bits. -bool ARMCodeGenPrepare::isSupportedValue(Value *V) { +bool TypePromotion::isSupportedValue(Value *V) { if (auto *I = dyn_cast(V)) { switch (I->getOpcode()) { default: @@ -849,7 +792,7 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) { /// Check that the type of V would be promoted and that the original type is /// smaller than the targeted promoted type. Check that we're not trying to /// promote something larger than our base 'TypeSize' type. -bool ARMCodeGenPrepare::isLegalToPromote(Value *V) { +bool TypePromotion::isLegalToPromote(Value *V) { auto *I = dyn_cast(V); if (!I) @@ -862,47 +805,20 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) { SafeToPromote.insert(I); return true; } - - if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub) - return false; - - // If promotion is not safe, can we use a DSP instruction to natively - // handle the narrow type? - if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I)) - return false; - - if (ST->isThumb() && !ST->hasThumb2()) - return false; - - // TODO - // Would it be profitable? For Thumb code, these parallel DSP instructions - // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For - // Cortex-A, specifically Cortex-A72, the latency is double and throughput is - // halved. They also do not take immediates as operands. - for (auto &Op : I->operands()) { - if (isa(Op)) { - if (!EnableDSPWithImms) - return false; - } - } - LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n"); - return true; + return false; } -bool ARMCodeGenPrepare::TryToPromote(Value *V) { +bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { OrigTy = V->getType(); TypeSize = OrigTy->getPrimitiveSizeInBits(); - if (TypeSize > 16 || TypeSize < 8) - return false; - SafeToPromote.clear(); SafeWrap.clear(); if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V)) return false; - LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = " - << TypeSize << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " + << TypeSize << " bits to " << PromotedWidth << "\n"); SetVector WorkList; SmallPtrSet Sources; @@ -923,7 +839,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { return true; if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) { - LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n"); return false; } @@ -979,7 +895,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { } } - LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n"; + LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n"; for (auto *I : CurrentVisited) I->dump(); ); @@ -995,28 +911,31 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { if (ToPromote < 2) return false; - Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote, - SafeWrap); + Promoter->Mutate(OrigTy, PromotedWidth, CurrentVisited, Sources, Sinks, + SafeToPromote, SafeWrap); return true; } -bool ARMCodeGenPrepare::doInitialization(Module &M) { +bool TypePromotion::doInitialization(Module &M) { Promoter = new IRPromoter(&M); return false; } -bool ARMCodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F) || DisableCGP) +bool TypePromotion::runOnFunction(Function &F) { + if (skipFunction(F) || DisablePromotion) return false; - auto *TPC = &getAnalysis(); + LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n"); + + auto *TPC = getAnalysisIfAvailable(); if (!TPC) return false; - const TargetMachine &TM = TPC->getTM(); - ST = &TM.getSubtarget(F); bool MadeChange = false; - LLVM_DEBUG(dbgs() << "ARM CGP: Running on " << F.getName() << "\n"); + const DataLayout &DL = F.getParent()->getDataLayout(); + const TargetMachine &TM = TPC->getTM(); + const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F); + const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); // Search up from icmps to try to promote their operands. for (BasicBlock &BB : F) { @@ -1025,18 +944,30 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) { if (AllVisited.count(&I)) continue; - if (isa(I)) { - auto &CI = cast(I); + if (!isa(&I)) + continue; + + auto *ICmp = cast(&I); + // Skip signed or pointer compares + if (ICmp->isSigned() || + !isa(ICmp->getOperand(0)->getType())) + continue; + + LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); - // Skip signed or pointer compares - if (CI.isSigned() || !isa(CI.getOperand(0)->getType())) - continue; + for (auto &Op : ICmp->operands()) { + if (auto *I = dyn_cast(Op)) { + EVT SrcVT = TLI->getValueType(DL, I->getType()); + if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) + break; - LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n"); + if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != + TargetLowering::TypePromoteInteger) + break; - for (auto &Op : CI.operands()) { - if (auto *I = dyn_cast(Op)) - MadeChange |= TryToPromote(I); + EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); + MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits()); + break; } } } @@ -1046,24 +977,22 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) { }); } if (MadeChange) - LLVM_DEBUG(dbgs() << "After ARMCodeGenPrepare: " << F << "\n"); + LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n"); return MadeChange; } -bool ARMCodeGenPrepare::doFinalization(Module &M) { +bool TypePromotion::doFinalization(Module &M) { delete Promoter; return false; } -INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE, - "ARM IR optimizations", false, false) -INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations", - false, false) +INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) -char ARMCodeGenPrepare::ID = 0; -unsigned ARMCodeGenPrepare::TypeSize = 0; +char TypePromotion::ID = 0; +unsigned TypePromotion::TypeSize = 0; -FunctionPass *llvm::createARMCodeGenPreparePass() { - return new ARMCodeGenPrepare(); +FunctionPass *llvm::createTypePromotionPass() { + return new TypePromotion(); } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index b868abf695823..41cbdf0355585 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -120,139 +120,14 @@ std::string EVT::getEVTString() const { + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); + if (isFloatingPoint()) + return "f" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); - case MVT::i1: return "i1"; - case MVT::i8: return "i8"; - case MVT::i16: return "i16"; - case MVT::i32: return "i32"; - case MVT::i64: return "i64"; - case MVT::i128: return "i128"; - case MVT::f16: return "f16"; - case MVT::f32: return "f32"; - case MVT::f64: return "f64"; - case MVT::f80: return "f80"; - case MVT::f128: return "f128"; case MVT::ppcf128: return "ppcf128"; case MVT::isVoid: return "isVoid"; case MVT::Other: return "ch"; case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; - case MVT::v1i1: return "v1i1"; - case MVT::v2i1: return "v2i1"; - case MVT::v4i1: return "v4i1"; - case MVT::v8i1: return "v8i1"; - case MVT::v16i1: return "v16i1"; - case MVT::v32i1: return "v32i1"; - case MVT::v64i1: return "v64i1"; - case MVT::v128i1: return "v128i1"; - case MVT::v256i1: return "v256i1"; - case MVT::v512i1: return "v512i1"; - case MVT::v1024i1: return "v1024i1"; - case MVT::v1i8: return "v1i8"; - case MVT::v2i8: return "v2i8"; - case MVT::v4i8: return "v4i8"; - case MVT::v8i8: return "v8i8"; - case MVT::v16i8: return "v16i8"; - case MVT::v32i8: return "v32i8"; - case MVT::v64i8: return "v64i8"; - case MVT::v128i8: return "v128i8"; - case MVT::v256i8: return "v256i8"; - case MVT::v1i16: return "v1i16"; - case MVT::v2i16: return "v2i16"; - case MVT::v3i16: return "v3i16"; - case MVT::v4i16: return "v4i16"; - case MVT::v8i16: return "v8i16"; - case MVT::v16i16: return "v16i16"; - case MVT::v32i16: return "v32i16"; - case MVT::v64i16: return "v64i16"; - case MVT::v128i16: return "v128i16"; - case MVT::v1i32: return "v1i32"; - case MVT::v2i32: return "v2i32"; - case MVT::v3i32: return "v3i32"; - case MVT::v4i32: return "v4i32"; - case MVT::v5i32: return "v5i32"; - case MVT::v8i32: return "v8i32"; - case MVT::v16i32: return "v16i32"; - case MVT::v32i32: return "v32i32"; - case MVT::v64i32: return "v64i32"; - case MVT::v128i32: return "v128i32"; - case MVT::v256i32: return "v256i32"; - case MVT::v512i32: return "v512i32"; - case MVT::v1024i32:return "v1024i32"; - case MVT::v2048i32:return "v2048i32"; - case MVT::v1i64: return "v1i64"; - case MVT::v2i64: return "v2i64"; - case MVT::v4i64: return "v4i64"; - case MVT::v8i64: return "v8i64"; - case MVT::v16i64: return "v16i64"; - case MVT::v32i64: return "v32i64"; - case MVT::v1i128: return "v1i128"; - case MVT::v1f32: return "v1f32"; - case MVT::v2f32: return "v2f32"; - case MVT::v2f16: return "v2f16"; - case MVT::v3f16: return "v3f16"; - case MVT::v4f16: return "v4f16"; - case MVT::v8f16: return "v8f16"; - case MVT::v16f16: return "v16f16"; - case MVT::v32f16: return "v32f16"; - case MVT::v3f32: return "v3f32"; - case MVT::v4f32: return "v4f32"; - case MVT::v5f32: return "v5f32"; - case MVT::v8f32: return "v8f32"; - case MVT::v16f32: return "v16f32"; - case MVT::v32f32: return "v32f32"; - case MVT::v64f32: return "v64f32"; - case MVT::v128f32: return "v128f32"; - case MVT::v256f32: return "v256f32"; - case MVT::v512f32: return "v512f32"; - case MVT::v1024f32:return "v1024f32"; - case MVT::v2048f32:return "v2048f32"; - case MVT::v1f64: return "v1f64"; - case MVT::v2f64: return "v2f64"; - case MVT::v4f64: return "v4f64"; - case MVT::v8f64: return "v8f64"; - case MVT::nxv1i1: return "nxv1i1"; - case MVT::nxv2i1: return "nxv2i1"; - case MVT::nxv4i1: return "nxv4i1"; - case MVT::nxv8i1: return "nxv8i1"; - case MVT::nxv16i1: return "nxv16i1"; - case MVT::nxv32i1: return "nxv32i1"; - case MVT::nxv1i8: return "nxv1i8"; - case MVT::nxv2i8: return "nxv2i8"; - case MVT::nxv4i8: return "nxv4i8"; - case MVT::nxv8i8: return "nxv8i8"; - case MVT::nxv16i8: return "nxv16i8"; - case MVT::nxv32i8: return "nxv32i8"; - case MVT::nxv1i16: return "nxv1i16"; - case MVT::nxv2i16: return "nxv2i16"; - case MVT::nxv4i16: return "nxv4i16"; - case MVT::nxv8i16: return "nxv8i16"; - case MVT::nxv16i16:return "nxv16i16"; - case MVT::nxv32i16:return "nxv32i16"; - case MVT::nxv1i32: return "nxv1i32"; - case MVT::nxv2i32: return "nxv2i32"; - case MVT::nxv4i32: return "nxv4i32"; - case MVT::nxv8i32: return "nxv8i32"; - case MVT::nxv16i32:return "nxv16i32"; - case MVT::nxv32i32:return "nxv32i32"; - case MVT::nxv1i64: return "nxv1i64"; - case MVT::nxv2i64: return "nxv2i64"; - case MVT::nxv4i64: return "nxv4i64"; - case MVT::nxv8i64: return "nxv8i64"; - case MVT::nxv16i64:return "nxv16i64"; - case MVT::nxv32i64:return "nxv32i64"; - case MVT::nxv2f16: return "nxv2f16"; - case MVT::nxv4f16: return "nxv4f16"; - case MVT::nxv8f16: return "nxv8f16"; - case MVT::nxv1f32: return "nxv1f32"; - case MVT::nxv2f32: return "nxv2f32"; - case MVT::nxv4f32: return "nxv4f32"; - case MVT::nxv8f32: return "nxv8f32"; - case MVT::nxv16f32:return "nxv16f32"; - case MVT::nxv1f64: return "nxv1f64"; - case MVT::nxv2f64: return "nxv2f64"; - case MVT::nxv4f64: return "nxv4f64"; - case MVT::nxv8f64: return "nxv8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::exnref : return "exnref"; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 4e70e232a9b5e..b268d2e6aef52 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -288,6 +288,7 @@ static void dumpRnglistsSection( static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, DWARFDataExtractor Data, const MCRegisterInfo *MRI, + const DWARFObject &Obj, Optional DumpOffset) { uint64_t Offset = 0; @@ -306,13 +307,13 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, if (DumpOffset) { if (DumpOffset >= Offset && DumpOffset < EndOffset) { Offset = *DumpOffset; - Loc.dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, nullptr, + Loc.dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, Obj, nullptr, DumpOpts, /*Indent=*/0); OS << "\n"; return; } } else { - Loc.dumpRange(Offset, EndOffset - Offset, OS, MRI, DumpOpts); + Loc.dumpRange(Offset, EndOffset - Offset, OS, MRI, Obj, DumpOpts); } Offset = EndOffset; } @@ -394,21 +395,21 @@ void DWARFContext::dump( if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc, DObj->getLocSection().Data)) { - getDebugLoc()->dump(OS, getRegisterInfo(), LLDumpOpts, *Off); + getDebugLoc()->dump(OS, getRegisterInfo(), *DObj, LLDumpOpts, *Off); } if (const auto *Off = shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists, DObj->getLoclistsSection().Data)) { DWARFDataExtractor Data(*DObj, DObj->getLoclistsSection(), isLittleEndian(), 0); - dumpLoclistsSection(OS, LLDumpOpts, Data, getRegisterInfo(), *Off); + dumpLoclistsSection(OS, LLDumpOpts, Data, getRegisterInfo(), *DObj, *Off); } if (const auto *Off = shouldDump(ExplicitDWO, ".debug_loclists.dwo", DIDT_ID_DebugLoclists, DObj->getLoclistsDWOSection().Data)) { DWARFDataExtractor Data(*DObj, DObj->getLoclistsDWOSection(), isLittleEndian(), 0); - dumpLoclistsSection(OS, LLDumpOpts, Data, getRegisterInfo(), *Off); + dumpLoclistsSection(OS, LLDumpOpts, Data, getRegisterInfo(), *DObj, *Off); } if (const auto *Off = @@ -420,11 +421,11 @@ void DWARFContext::dump( if (*Off) { uint64_t Offset = **Off; Loc.dumpLocationList(&Offset, OS, - /*BaseAddr=*/None, getRegisterInfo(), nullptr, + /*BaseAddr=*/None, getRegisterInfo(), *DObj, nullptr, LLDumpOpts, /*Indent=*/0); OS << "\n"; } else { - Loc.dumpRange(0, Data.getData().size(), OS, getRegisterInfo(), + Loc.dumpRange(0, Data.getData().size(), OS, getRegisterInfo(), *DObj, LLDumpOpts); } } @@ -441,6 +442,9 @@ void DWARFContext::dump( if (Explicit || !getDebugMacro()->empty()) { OS << "\n.debug_macinfo contents:\n"; getDebugMacro()->dump(OS); + } else if (ExplicitDWO || !getDebugMacroDWO()->empty()) { + OS << "\n.debug_macinfo.dwo contents:\n"; + getDebugMacroDWO()->dump(OS); } } @@ -797,6 +801,17 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() { return DebugFrame.get(); } +const DWARFDebugMacro *DWARFContext::getDebugMacroDWO() { + if (MacroDWO) + return MacroDWO.get(); + + DataExtractor MacinfoDWOData(DObj->getMacinfoDWOSection(), isLittleEndian(), + 0); + MacroDWO.reset(new DWARFDebugMacro()); + MacroDWO->parse(MacinfoDWOData); + return MacroDWO.get(); +} + const DWARFDebugMacro *DWARFContext::getDebugMacro() { if (Macro) return Macro.get(); @@ -1500,6 +1515,7 @@ class DWARFObjInMemory final : public DWARFObject { StringRef ArangesSection; StringRef StrSection; StringRef MacinfoSection; + StringRef MacinfoDWOSection; StringRef AbbrevDWOSection; StringRef StrDWOSection; StringRef CUIndexSection; @@ -1519,6 +1535,7 @@ class DWARFObjInMemory final : public DWARFObject { .Case("debug_aranges", &ArangesSection) .Case("debug_str", &StrSection) .Case("debug_macinfo", &MacinfoSection) + .Case("debug_macinfo.dwo", &MacinfoDWOSection) .Case("debug_abbrev.dwo", &AbbrevDWOSection) .Case("debug_str.dwo", &StrDWOSection) .Case("debug_cu_index", &CUIndexSection) @@ -1845,6 +1862,7 @@ class DWARFObjInMemory final : public DWARFObject { return RnglistsSection; } StringRef getMacinfoSection() const override { return MacinfoSection; } + StringRef getMacinfoDWOSection() const override { return MacinfoDWOSection; } const DWARFSection &getPubnamesSection() const override { return PubnamesSection; } const DWARFSection &getPubtypesSection() const override { return PubtypesSection; } const DWARFSection &getGnuPubnamesSection() const override { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index baa35eb813cf5..8aed9ab653a16 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -57,6 +57,17 @@ DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) { return createResolverError(E.Value0, E.Kind); return None; } + case dwarf::DW_LLE_startx_endx: { + Optional LowPC = LookupAddr(E.Value0); + if (!LowPC) + return createResolverError(E.Value0, E.Kind); + Optional HighPC = LookupAddr(E.Value1); + if (!HighPC) + return createResolverError(E.Value1, E.Kind); + return DWARFLocationExpression{ + DWARFAddressRange{LowPC->Address, HighPC->Address, LowPC->SectionIndex}, + E.Loc}; + } case dwarf::DW_LLE_startx_length: { Optional LowPC = LookupAddr(E.Value0); if (!LowPC) @@ -66,23 +77,29 @@ DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) { LowPC->SectionIndex}, E.Loc}; } - case dwarf::DW_LLE_offset_pair: + case dwarf::DW_LLE_offset_pair: { if (!Base) { return createStringError( inconvertibleErrorCode(), "Unable to resolve DW_LLE_offset_pair: base address unknown"); } - return DWARFLocationExpression{DWARFAddressRange{Base->Address + E.Value0, - Base->Address + E.Value1, - Base->SectionIndex}, - E.Loc}; + DWARFAddressRange Range{Base->Address + E.Value0, Base->Address + E.Value1, + Base->SectionIndex}; + if (Range.SectionIndex == SectionedAddress::UndefSection) + Range.SectionIndex = E.SectionIndex; + return DWARFLocationExpression{Range, E.Loc}; + } + case dwarf::DW_LLE_default_location: + return DWARFLocationExpression{None, E.Loc}; case dwarf::DW_LLE_base_address: - Base = SectionedAddress{E.Value0, SectionedAddress::UndefSection}; + Base = SectionedAddress{E.Value0, E.SectionIndex}; return None; + case dwarf::DW_LLE_start_end: + return DWARFLocationExpression{ + DWARFAddressRange{E.Value0, E.Value1, E.SectionIndex}, E.Loc}; case dwarf::DW_LLE_start_length: return DWARFLocationExpression{ - DWARFAddressRange{E.Value0, E.Value0 + E.Value1, - SectionedAddress::UndefSection}, + DWARFAddressRange{E.Value0, E.Value0 + E.Value1, E.SectionIndex}, E.Loc}; default: llvm_unreachable("unreachable locations list kind"); @@ -104,7 +121,8 @@ static void dumpExpression(raw_ostream &OS, ArrayRef Data, bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS, Optional BaseAddr, const MCRegisterInfo *MRI, - DWARFUnit *U, DIDumpOptions DumpOpts, + const DWARFObject &Obj, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent) const { DWARFLocationInterpreter Interp( BaseAddr, [U](uint32_t Index) -> Optional { @@ -116,7 +134,7 @@ bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS, Error E = visitLocationList(Offset, [&](const DWARFLocationEntry &E) { Expected> Loc = Interp.Interpret(E); if (!Loc || DumpOpts.DisplayRawContents) - dumpRawEntry(E, OS, Indent); + dumpRawEntry(E, OS, Indent, DumpOpts, Obj); if (Loc && *Loc) { OS << "\n"; OS.indent(Indent); @@ -125,10 +143,10 @@ bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS, DIDumpOptions RangeDumpOpts(DumpOpts); RangeDumpOpts.DisplayRawContents = false; - const DWARFObject *Obj = nullptr; - if (U) - Obj = &U->getContext().getDWARFObj(); - Loc.get()->Range->dump(OS, Data.getAddressSize(), RangeDumpOpts, Obj); + if (Loc.get()->Range) + Loc.get()->Range->dump(OS, Data.getAddressSize(), RangeDumpOpts, &Obj); + else + OS << ""; } if (!Loc) consumeError(Loc.takeError()); @@ -167,12 +185,12 @@ Error DWARFLocationTable::visitAbsoluteLocationList( } void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, - DIDumpOptions DumpOpts, + const DWARFObject &Obj, DIDumpOptions DumpOpts, Optional DumpOffset) const { auto BaseAddr = None; unsigned Indent = 12; if (DumpOffset) { - dumpLocationList(&*DumpOffset, OS, BaseAddr, MRI, nullptr, DumpOpts, + dumpLocationList(&*DumpOffset, OS, BaseAddr, MRI, Obj, nullptr, DumpOpts, Indent); } else { uint64_t Offset = 0; @@ -182,7 +200,7 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, OS << Separator; Separator = "\n"; - CanContinue = dumpLocationList(&Offset, OS, BaseAddr, MRI, nullptr, + CanContinue = dumpLocationList(&Offset, OS, BaseAddr, MRI, Obj, nullptr, DumpOpts, Indent); OS << '\n'; } @@ -194,8 +212,9 @@ Error DWARFDebugLoc::visitLocationList( function_ref Callback) const { DataExtractor::Cursor C(*Offset); while (true) { + uint64_t SectionIndex; uint64_t Value0 = Data.getRelocatedAddress(C); - uint64_t Value1 = Data.getRelocatedAddress(C); + uint64_t Value1 = Data.getRelocatedAddress(C, &SectionIndex); DWARFLocationEntry E; @@ -208,10 +227,12 @@ Error DWARFDebugLoc::visitLocationList( } else if (Value0 == (Data.getAddressSize() == 4 ? -1U : -1ULL)) { E.Kind = dwarf::DW_LLE_base_address; E.Value0 = Value1; + E.SectionIndex = SectionIndex; } else { E.Kind = dwarf::DW_LLE_offset_pair; E.Value0 = Value0; E.Value1 = Value1; + E.SectionIndex = SectionIndex; unsigned Bytes = Data.getU16(C); // A single location description describing the location of the object... Data.getU8(C, E.Loc, Bytes); @@ -227,7 +248,9 @@ Error DWARFDebugLoc::visitLocationList( } void DWARFDebugLoc::dumpRawEntry(const DWARFLocationEntry &Entry, - raw_ostream &OS, unsigned Indent) const { + raw_ostream &OS, unsigned Indent, + DIDumpOptions DumpOpts, + const DWARFObject &Obj) const { uint64_t Value0, Value1; switch (Entry.Kind) { case dwarf::DW_LLE_base_address: @@ -248,6 +271,7 @@ void DWARFDebugLoc::dumpRawEntry(const DWARFLocationEntry &Entry, OS.indent(Indent); OS << '(' << format_hex(Value0, 2 + Data.getAddressSize() * 2) << ", " << format_hex(Value1, 2 + Data.getAddressSize() * 2) << ')'; + DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, Entry.SectionIndex); } Error DWARFDebugLoclists::visitLocationList( @@ -264,6 +288,10 @@ Error DWARFDebugLoclists::visitLocationList( case dwarf::DW_LLE_base_addressx: E.Value0 = Data.getULEB128(C); break; + case dwarf::DW_LLE_startx_endx: + E.Value0 = Data.getULEB128(C); + E.Value1 = Data.getULEB128(C); + break; case dwarf::DW_LLE_startx_length: E.Value0 = Data.getULEB128(C); // Pre-DWARF 5 has different interpretation of the length field. We have @@ -276,17 +304,21 @@ Error DWARFDebugLoclists::visitLocationList( case dwarf::DW_LLE_offset_pair: E.Value0 = Data.getULEB128(C); E.Value1 = Data.getULEB128(C); + E.SectionIndex = SectionedAddress::UndefSection; + break; + case dwarf::DW_LLE_default_location: break; case dwarf::DW_LLE_base_address: - E.Value0 = Data.getRelocatedAddress(C); + E.Value0 = Data.getRelocatedAddress(C, &E.SectionIndex); + break; + case dwarf::DW_LLE_start_end: + E.Value0 = Data.getRelocatedAddress(C, &E.SectionIndex); + E.Value1 = Data.getRelocatedAddress(C); break; case dwarf::DW_LLE_start_length: - E.Value0 = Data.getRelocatedAddress(C); + E.Value0 = Data.getRelocatedAddress(C, &E.SectionIndex); E.Value1 = Data.getULEB128(C); break; - case dwarf::DW_LLE_startx_endx: - case dwarf::DW_LLE_default_location: - case dwarf::DW_LLE_start_end: default: cantFail(C.takeError()); return createStringError(errc::illegal_byte_sequence, @@ -310,7 +342,9 @@ Error DWARFDebugLoclists::visitLocationList( } void DWARFDebugLoclists::dumpRawEntry(const DWARFLocationEntry &Entry, - raw_ostream &OS, unsigned Indent) const { + raw_ostream &OS, unsigned Indent, + DIDumpOptions DumpOpts, + const DWARFObject &Obj) const { size_t MaxEncodingStringLength = 0; #define HANDLE_DW_LLE(ID, NAME) \ MaxEncodingStringLength = std::max(MaxEncodingStringLength, \ @@ -325,9 +359,14 @@ void DWARFDebugLoclists::dumpRawEntry(const DWARFLocationEntry &Entry, OS << format("%-*s(", MaxEncodingStringLength, EncodingString.data()); unsigned FieldSize = 2 + 2 * Data.getAddressSize(); switch (Entry.Kind) { + case dwarf::DW_LLE_end_of_list: + case dwarf::DW_LLE_default_location: + break; + case dwarf::DW_LLE_startx_endx: case dwarf::DW_LLE_startx_length: - case dwarf::DW_LLE_start_length: case dwarf::DW_LLE_offset_pair: + case dwarf::DW_LLE_start_end: + case dwarf::DW_LLE_start_length: OS << format_hex(Entry.Value0, FieldSize) << ", " << format_hex(Entry.Value1, FieldSize); break; @@ -335,14 +374,22 @@ void DWARFDebugLoclists::dumpRawEntry(const DWARFLocationEntry &Entry, case dwarf::DW_LLE_base_address: OS << format_hex(Entry.Value0, FieldSize); break; - case dwarf::DW_LLE_end_of_list: - break; } OS << ')'; + switch (Entry.Kind) { + case dwarf::DW_LLE_base_address: + case dwarf::DW_LLE_start_end: + case dwarf::DW_LLE_start_length: + DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, Entry.SectionIndex); + break; + default: + break; + } } void DWARFDebugLoclists::dumpRange(uint64_t StartOffset, uint64_t Size, raw_ostream &OS, const MCRegisterInfo *MRI, + const DWARFObject &Obj, DIDumpOptions DumpOpts) { if (!Data.isValidOffsetForDataOfSize(StartOffset, Size)) { OS << "Invalid dump range\n"; @@ -355,8 +402,8 @@ void DWARFDebugLoclists::dumpRange(uint64_t StartOffset, uint64_t Size, OS << Separator; Separator = "\n"; - CanContinue = dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, nullptr, - DumpOpts, /*Indent=*/12); + CanContinue = dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, Obj, + nullptr, DumpOpts, /*Indent=*/12); OS << '\n'; } } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp index f6785b89e86d4..9ae4c5b73ebe9 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp @@ -114,12 +114,21 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t End, DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( llvm::Optional BaseAddr, DWARFUnit &U) const { + return getAbsoluteRanges(BaseAddr, [&](uint32_t Index) { + return U.getAddrOffsetSectionItem(Index); + }); +} + +DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( + Optional BaseAddr, + function_ref(uint32_t)> + LookupPooledAddress) const { DWARFAddressRangesVector Res; for (const RangeListEntry &RLE : Entries) { if (RLE.EntryKind == dwarf::DW_RLE_end_of_list) break; if (RLE.EntryKind == dwarf::DW_RLE_base_addressx) { - BaseAddr = U.getAddrOffsetSectionItem(RLE.Value0); + BaseAddr = LookupPooledAddress(RLE.Value0); if (!BaseAddr) BaseAddr = {RLE.Value0, -1ULL}; continue; @@ -152,7 +161,7 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges( E.HighPC = E.LowPC + RLE.Value1; break; case dwarf::DW_RLE_startx_length: { - auto Start = U.getAddrOffsetSectionItem(RLE.Value0); + auto Start = LookupPooledAddress(RLE.Value0); if (!Start) Start = {0, -1ULL}; E.SectionIndex = Start->SectionIndex; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index cc3d021b0ddbd..4b86359c04e3f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -91,12 +91,13 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, FormValue.dump(OS, DumpOpts); if (auto LoclistOffset = U->getLoclistOffset(Offset)) - Offset = *LoclistOffset + U->getLocSectionBase(); + Offset = *LoclistOffset; else return; } U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(), - MRI, U, DumpOpts, Indent); + MRI, Ctx.getDWARFObj(), U, DumpOpts, + Indent); return; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index b662e88816f8a..4ccda628093c9 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -629,7 +629,7 @@ DWARFUnit::findRnglistFromOffset(uint64_t Offset) { Expected DWARFUnit::findRnglistFromIndex(uint32_t Index) { if (auto Offset = getRnglistOffset(Index)) - return findRnglistFromOffset(*Offset + RangeSectionBase); + return findRnglistFromOffset(*Offset); if (RngListTable) return createStringError(errc::invalid_argument, diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index be79d9e637c14..cb076aed3aac4 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -284,6 +284,79 @@ bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); } +template +Optional> getBuildID(const ELFFile *Obj) { + if (!Obj) + return {}; + auto PhdrsOrErr = Obj->program_headers(); + if (!PhdrsOrErr) { + consumeError(PhdrsOrErr.takeError()); + return {}; + } + for (const auto &P : *PhdrsOrErr) { + if (P.p_type != ELF::PT_NOTE) + continue; + Error Err = Error::success(); + for (const auto &N : Obj->notes(P, Err)) + if (N.getType() == ELF::NT_GNU_BUILD_ID && N.getName() == ELF::ELF_NOTE_GNU) + return N.getDesc(); + } + return {}; +} + +Optional> getBuildID(const ELFObjectFileBase *Obj) { + Optional> BuildID; + if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else + llvm_unreachable("unsupported file format"); + return BuildID; +} + +bool findDebugBinary(const std::vector &DebugFileDirectory, + const ArrayRef BuildID, + std::string &Result) { + auto getDebugPath = [&](StringRef Directory) { + SmallString<128> Path{Directory}; + sys::path::append(Path, ".build-id", + llvm::toHex(BuildID[0], /*LowerCase=*/true), + llvm::toHex(BuildID.slice(1), /*LowerCase=*/true)); + Path += ".debug"; + return Path; + }; + if (DebugFileDirectory.empty()) { + SmallString<128> Path = getDebugPath( +#if defined(__NetBSD__) + // Try /usr/libdata/debug/.build-id/../... + "/usr/libdata/debug" +#else + // Try /usr/lib/debug/.build-id/../... + "/usr/lib/debug" +#endif + ); + if (llvm::sys::fs::exists(Path)) { + Result = Path.str(); + return true; + } + } else { + for (const auto &Directory : DebugFileDirectory) { + // Try /.build-id/../... + SmallString<128> Path = getDebugPath(Directory); + if (llvm::sys::fs::exists(Path)) { + Result = Path.str(); + return true; + } + } + } + return false; +} + } // end anonymous namespace ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, @@ -335,6 +408,25 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, return DbgObjOrErr.get(); } +ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, + const ELFObjectFileBase *Obj, + const std::string &ArchName) { + auto BuildID = getBuildID(Obj); + if (!BuildID) + return nullptr; + if (BuildID->size() < 2) + return nullptr; + std::string DebugBinaryPath; + if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath)) + return nullptr; + auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); + if (!DbgObjOrErr) { + consumeError(DbgObjOrErr.takeError()); + return nullptr; + } + return DbgObjOrErr.get(); +} + Expected LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, const std::string &ArchName) { @@ -355,6 +447,8 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, if (auto MachObj = dyn_cast(Obj)) DbgObj = lookUpDsymFile(Path, MachObj, ArchName); + else if (auto ELFObj = dyn_cast(Obj)) + DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName); if (!DbgObj) DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); if (!DbgObj) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 9df79670d9fba..6c924f8895776 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -266,6 +266,16 @@ void LinkGraph::dump(raw_ostream &OS, << "\n"; } +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF) { + switch (LF) { + case SymbolLookupFlags::RequiredSymbol: + return OS << "RequiredSymbol"; + case SymbolLookupFlags::WeaklyReferencedSymbol: + return OS << "WeaklyReferencedSymbol"; + } + llvm_unreachable("Unrecognized lookup flags"); +} + void JITLinkAsyncLookupContinuation::anchor() {} JITLinkContext::~JITLinkContext() {} diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 9707b9624d936..7b594fd2c0ea9 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -257,25 +257,35 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { return Error::success(); } -DenseSet JITLinkerBase::getExternalSymbolNames() const { +JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const { // Identify unresolved external symbols. - DenseSet UnresolvedExternals; + JITLinkContext::LookupMap UnresolvedExternals; for (auto *Sym : G->external_symbols()) { assert(Sym->getAddress() == 0 && "External has already been assigned an address"); assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); - UnresolvedExternals.insert(Sym->getName()); + SymbolLookupFlags LookupFlags = + Sym->getLinkage() == Linkage::Weak + ? SymbolLookupFlags::WeaklyReferencedSymbol + : SymbolLookupFlags::RequiredSymbol; + UnresolvedExternals[Sym->getName()] = LookupFlags; } return UnresolvedExternals; } void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { for (auto *Sym : G->external_symbols()) { + assert(Sym->getOffset() == 0 && + "External symbol is not at the start of its addressable block"); assert(Sym->getAddress() == 0 && "Symbol already resolved"); assert(!Sym->isDefined() && "Symbol being resolved is already defined"); - assert(Result.count(Sym->getName()) && "Missing resolution for symbol"); - Sym->getAddressable().setAddress(Result[Sym->getName()].getAddress()); + auto ResultI = Result.find(Sym->getName()); + if (ResultI != Result.end()) + Sym->getAddressable().setAddress(ResultI->second.getAddress()); + else + assert(Sym->getLinkage() == Linkage::Weak && + "Failed to resolve non-weak reference"); } LLVM_DEBUG({ @@ -285,8 +295,11 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { << formatv("{0:x16}", Sym->getAddress()) << "\n"; }); assert(llvm::all_of(G->external_symbols(), - [](Symbol *Sym) { return Sym->getAddress() != 0; }) && - "All symbols should have been resolved by this point"); + [](Symbol *Sym) { + return Sym->getAddress() != 0 || + Sym->getLinkage() == Linkage::Weak; + }) && + "All strong external symbols should have been resolved by now"); } void JITLinkerBase::deallocateAndBailOut(Error Err) { diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h index 07dee6cee2002..d5687b7afc967 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h @@ -106,7 +106,7 @@ class JITLinkerBase { SegmentLayoutMap layOutBlocks(); Error allocateSegments(const SegmentLayoutMap &Layout); - DenseSet getExternalSymbolNames() const; + JITLinkContext::LookupMap getExternalSymbolNames() const; void applyLookupResult(AsyncLookupResult LR); void deallocateAndBailOut(Error Err); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index c1dc138ee7024..1881bd0b287e0 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -321,7 +321,9 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous external symbol at " "index " + Twine(KV.first)); - NSym.GraphSymbol = &G->addExternalSymbol(*NSym.Name, 0); + NSym.GraphSymbol = &G->addExternalSymbol( + *NSym.Name, 0, + NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong); } break; case MachO::N_ABS: diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 9dbfb6556e317..69ec72aae2928 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -252,7 +252,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); - Addend = *(const ulittle32_t *)FixupContent; + Addend = *(const little32_t *)FixupContent; break; case Pointer32: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) @@ -284,12 +284,12 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); - Addend = *(const ulittle32_t *)FixupContent + + Addend = *(const little32_t *)FixupContent + (1 << (*Kind - PCRel32Minus1)); break; case PCRel32Anon: { JITTargetAddress TargetAddress = - FixupAddress + 4 + *(const ulittle32_t *)FixupContent; + FixupAddress + 4 + *(const little32_t *)FixupContent; if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) TargetSymbol = &*TargetSymbolOrErr; else @@ -303,7 +303,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { JITTargetAddress Delta = static_cast(1ULL << (*Kind - PCRel32Minus1Anon)); JITTargetAddress TargetAddress = - FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent; + FixupAddress + 4 + Delta + *(const little32_t *)FixupContent; if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) TargetSymbol = &*TargetSymbolOrErr; else diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 75ddbc30445d2..f26835ff8a085 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -162,7 +162,8 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, return; } - R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true)); + R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), + JITDylibLookupFlags::MatchAllSymbols)); R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), std::move(Callables), AliaseeImpls)); } @@ -171,18 +172,22 @@ CompileOnDemandLayer::PerDylibResources & CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) { auto I = DylibResources.find(&TargetD); if (I == DylibResources.end()) { - auto &ImplD = getExecutionSession().createJITDylib( - TargetD.getName() + ".impl", false); - TargetD.withSearchOrderDo([&](const JITDylibSearchList &TargetSearchOrder) { - auto NewSearchOrder = TargetSearchOrder; - assert(!NewSearchOrder.empty() && - NewSearchOrder.front().first == &TargetD && - NewSearchOrder.front().second == true && - "TargetD must be at the front of its own search order and match " - "non-exported symbol"); - NewSearchOrder.insert(std::next(NewSearchOrder.begin()), {&ImplD, true}); - ImplD.setSearchOrder(std::move(NewSearchOrder), false); - }); + auto &ImplD = + getExecutionSession().createJITDylib(TargetD.getName() + ".impl"); + TargetD.withSearchOrderDo( + [&](const JITDylibSearchOrder &TargetSearchOrder) { + auto NewSearchOrder = TargetSearchOrder; + assert( + !NewSearchOrder.empty() && + NewSearchOrder.front().first == &TargetD && + NewSearchOrder.front().second == + JITDylibLookupFlags::MatchAllSymbols && + "TargetD must be at the front of its own search order and match " + "non-exported symbol"); + NewSearchOrder.insert(std::next(NewSearchOrder.begin()), + {&ImplD, JITDylibLookupFlags::MatchAllSymbols}); + ImplD.setSearchOrder(std::move(NewSearchOrder), false); + }); PerDylibResources PDR(ImplD, BuildIndirectStubsManager()); I = DylibResources.insert(std::make_pair(&TargetD, std::move(PDR))).first; } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 9e024ba0f10f8..63ef889dae464 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/Core.h" + +#include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/IR/Mangler.h" @@ -77,16 +79,19 @@ bool flagsMatchCLOpts(const JITSymbolFlags &Flags) { #endif // NDEBUG } -// Prints a set of items, filtered by an user-supplied predicate. -template > -class SetPrinter { +// Prints a sequence of items, filtered by an user-supplied predicate. +template > +class SequencePrinter { public: - SetPrinter(const Set &S, Pred ShouldPrint = Pred()) - : S(S), ShouldPrint(std::move(ShouldPrint)) {} + SequencePrinter(const Sequence &S, char OpenSeq, char CloseSeq, + Pred ShouldPrint = Pred()) + : S(S), OpenSeq(OpenSeq), CloseSeq(CloseSeq), + ShouldPrint(std::move(ShouldPrint)) {} void printTo(llvm::raw_ostream &OS) const { bool PrintComma = false; - OS << "{"; + OS << OpenSeq; for (auto &E : S) { if (ShouldPrint(E)) { if (PrintComma) @@ -95,23 +100,26 @@ class SetPrinter { PrintComma = true; } } - OS << " }"; + OS << ' ' << CloseSeq; } private: - const Set &S; + const Sequence &S; + char OpenSeq; + char CloseSeq; mutable Pred ShouldPrint; }; -template -SetPrinter printSet(const Set &S, Pred P = Pred()) { - return SetPrinter(S, std::move(P)); +template +SequencePrinter printSequence(const Sequence &S, char OpenSeq, + char CloseSeq, Pred P = Pred()) { + return SequencePrinter(S, OpenSeq, CloseSeq, std::move(P)); } -// Render a SetPrinter by delegating to its printTo method. -template +// Render a SequencePrinter by delegating to its printTo method. +template llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const SetPrinter &Printer) { + const SequencePrinter &Printer) { Printer.printTo(OS); return OS; } @@ -147,7 +155,11 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym) { } raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) { - return OS << printSet(Symbols, PrintAll()); + return OS << printSequence(Symbols, '{', '}', PrintAll()); +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolNameVector &Symbols) { + return OS << printSequence(Symbols, '[', ']', PrintAll()); } raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) { @@ -182,11 +194,13 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV) { } raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) { - return OS << printSet(SymbolFlags, PrintSymbolFlagsMapElemsMatchingCLOpts()); + return OS << printSequence(SymbolFlags, '{', '}', + PrintSymbolFlagsMapElemsMatchingCLOpts()); } raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols) { - return OS << printSet(Symbols, PrintSymbolMapElemsMatchingCLOpts()); + return OS << printSequence(Symbols, '{', '}', + PrintSymbolMapElemsMatchingCLOpts()); } raw_ostream &operator<<(raw_ostream &OS, @@ -195,7 +209,8 @@ raw_ostream &operator<<(raw_ostream &OS, } raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps) { - return OS << printSet(Deps, PrintAll()); + return OS << printSequence(Deps, '{', '}', + PrintAll()); } raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) { @@ -205,16 +220,59 @@ raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) { return OS << ")"; } -raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) { +raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K) { + switch (K) { + case LookupKind::Static: + return OS << "Static"; + case LookupKind::DLSym: + return OS << "DLSym"; + } + llvm_unreachable("Invalid lookup kind"); +} + +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibLookupFlags &JDLookupFlags) { + switch (JDLookupFlags) { + case JITDylibLookupFlags::MatchExportedSymbolsOnly: + return OS << "MatchExportedSymbolsOnly"; + case JITDylibLookupFlags::MatchAllSymbols: + return OS << "MatchAllSymbols"; + } + llvm_unreachable("Invalid JITDylib lookup flags"); +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags) { + switch (LookupFlags) { + case SymbolLookupFlags::RequiredSymbol: + return OS << "RequiredSymbol"; + case SymbolLookupFlags::WeaklyReferencedSymbol: + return OS << "WeaklyReferencedSymbol"; + } + llvm_unreachable("Invalid symbol lookup flags"); +} + +raw_ostream &operator<<(raw_ostream &OS, + const SymbolLookupSet::value_type &KV) { + return OS << "(" << KV.first << ", " << KV.second << ")"; +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet &LookupSet) { + return OS << printSequence(LookupSet, '{', '}', + PrintAll()); +} + +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibSearchOrder &SearchOrder) { OS << "["; - if (!JDs.empty()) { - assert(JDs.front().first && "JITDylibList entries must not be null"); - OS << " (\"" << JDs.front().first->getName() << "\", " - << (JDs.front().second ? "true" : "false") << ")"; - for (auto &KV : make_range(std::next(JDs.begin()), JDs.end())) { + if (!SearchOrder.empty()) { + assert(SearchOrder.front().first && + "JITDylibList entries must not be null"); + OS << " (\"" << SearchOrder.front().first->getName() << "\", " + << SearchOrder.begin()->second << ")"; + for (auto &KV : + make_range(std::next(SearchOrder.begin(), 1), SearchOrder.end())) { assert(KV.first && "JITDylibList entries must not be null"); - OS << ", (\"" << KV.first->getName() << "\", " - << (KV.second ? "true" : "false") << ")"; + OS << ", (\"" << KV.first->getName() << "\", " << KV.second << ")"; } } OS << " ]"; @@ -262,7 +320,13 @@ void FailedToMaterialize::log(raw_ostream &OS) const { OS << "Failed to materialize symbols: " << *Symbols; } -SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) +SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) { + for (auto &Sym : Symbols) + this->Symbols.push_back(Sym); + assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); +} + +SymbolsNotFound::SymbolsNotFound(SymbolNameVector Symbols) : Symbols(std::move(Symbols)) { assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); } @@ -289,7 +353,7 @@ void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const { } AsynchronousSymbolQuery::AsynchronousSymbolQuery( - const SymbolNameSet &Symbols, SymbolState RequiredState, + const SymbolLookupSet &Symbols, SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete) : NotifyComplete(std::move(NotifyComplete)), RequiredState(RequiredState) { assert(RequiredState >= SymbolState::Resolved && @@ -298,8 +362,8 @@ AsynchronousSymbolQuery::AsynchronousSymbolQuery( OutstandingSymbolsCount = Symbols.size(); - for (auto &S : Symbols) - ResolvedSymbols[S] = nullptr; + for (auto &KV : Symbols) + ResolvedSymbols[KV.first] = nullptr; } void AsynchronousSymbolQuery::notifySymbolMetRequiredState( @@ -511,10 +575,10 @@ AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) { } ReExportsMaterializationUnit::ReExportsMaterializationUnit( - JITDylib *SourceJD, bool MatchNonExported, SymbolAliasMap Aliases, - VModuleKey K) + JITDylib *SourceJD, JITDylibLookupFlags SourceJDLookupFlags, + SymbolAliasMap Aliases, VModuleKey K) : MaterializationUnit(extractFlags(Aliases), std::move(K)), - SourceJD(SourceJD), MatchNonExported(MatchNonExported), + SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags), Aliases(std::move(Aliases)) {} StringRef ReExportsMaterializationUnit::getName() const { @@ -551,7 +615,7 @@ void ReExportsMaterializationUnit::materialize( if (!Aliases.empty()) { if (SourceJD) - R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported)); + R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); else R.replace(symbolAliases(std::move(Aliases))); } @@ -572,11 +636,11 @@ void ReExportsMaterializationUnit::materialize( // be waitin on a symbol that it itself had to resolve. Usually this will just // involve one round and a single query. - std::vector>> + std::vector>> QueryInfos; while (!RequestedAliases.empty()) { SymbolNameSet ResponsibilitySymbols; - SymbolNameSet QuerySymbols; + SymbolLookupSet QuerySymbols; SymbolAliasMap QueryAliases; // Collect as many aliases as we can without including a chain. @@ -587,7 +651,7 @@ void ReExportsMaterializationUnit::materialize( continue; ResponsibilitySymbols.insert(KV.first); - QuerySymbols.insert(KV.second.Aliasee); + QuerySymbols.add(KV.second.Aliasee); QueryAliases[KV.first] = std::move(KV.second); } @@ -657,8 +721,9 @@ void ReExportsMaterializationUnit::materialize( } }; - ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols, - SymbolState::Resolved, std::move(OnComplete), + ES.lookup(LookupKind::Static, + JITDylibSearchOrder({{&SrcJD, SourceJDLookupFlags}}), + QuerySymbols, SymbolState::Resolved, std::move(OnComplete), std::move(RegisterDependencies)); } } @@ -681,16 +746,16 @@ ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) { Expected buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) { - auto Flags = SourceJD.lookupFlags(Symbols); + SymbolLookupSet LookupSet(Symbols); + auto Flags = SourceJD.lookupFlags( + LookupKind::Static, JITDylibLookupFlags::MatchAllSymbols, LookupSet); if (!Flags) return Flags.takeError(); - if (Flags->size() != Symbols.size()) { - SymbolNameSet Unresolved = Symbols; - for (auto &KV : *Flags) - Unresolved.erase(KV.first); - return make_error(std::move(Unresolved)); + if (!LookupSet.empty()) { + LookupSet.sortByName(); + return make_error(LookupSet.getSymbolNames()); } SymbolAliasMap Result; @@ -703,32 +768,32 @@ buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) { } ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD, - bool MatchNonExported, + JITDylibLookupFlags SourceJDLookupFlags, SymbolPredicate Allow) - : SourceJD(SourceJD), MatchNonExported(MatchNonExported), + : SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags), Allow(std::move(Allow)) {} -Expected -ReexportsGenerator::tryToGenerate(JITDylib &JD, const SymbolNameSet &Names) { - orc::SymbolNameSet Added; - orc::SymbolAliasMap AliasMap; - - auto Flags = SourceJD.lookupFlags(Names); +Error ReexportsGenerator::tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) { + assert(&JD != &SourceJD && "Cannot re-export from the same dylib"); + // Use lookupFlags to find the subset of symbols that match our lookup. + auto Flags = SourceJD.lookupFlags(K, JDLookupFlags, LookupSet); if (!Flags) return Flags.takeError(); - for (auto &KV : *Flags) { - if (Allow && !Allow(KV.first)) - continue; - AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second); - Added.insert(KV.first); - } + // Create an alias map. + orc::SymbolAliasMap AliasMap; + for (auto &KV : *Flags) + if (!Allow || Allow(KV.first)) + AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second); - if (!Added.empty()) - cantFail(JD.define(reexports(SourceJD, AliasMap, MatchNonExported))); + if (AliasMap.empty()) + return Error::success(); - return Added; + // Define the re-exports. + return JD.define(reexports(SourceJD, AliasMap, SourceJDLookupFlags)); } JITDylib::DefinitionGenerator::~DefinitionGenerator() {} @@ -1252,41 +1317,41 @@ void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) { Q->handleFailed(make_error(FailedSymbolsMap)); } -void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder, - bool SearchThisJITDylibFirst, - bool MatchNonExportedInThisDylib) { - if (SearchThisJITDylibFirst) { - if (NewSearchOrder.empty() || NewSearchOrder.front().first != this) - NewSearchOrder.insert(NewSearchOrder.begin(), - {this, MatchNonExportedInThisDylib}); - } - - ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); }); -} - -void JITDylib::addToSearchOrder(JITDylib &JD, bool MatchNonExported) { +void JITDylib::setSearchOrder(JITDylibSearchOrder NewSearchOrder, + bool SearchThisJITDylibFirst) { ES.runSessionLocked([&]() { - SearchOrder.push_back({&JD, MatchNonExported}); + if (SearchThisJITDylibFirst) { + SearchOrder.clear(); + if (NewSearchOrder.empty() || NewSearchOrder.front().first != this) + SearchOrder.push_back( + std::make_pair(this, JITDylibLookupFlags::MatchAllSymbols)); + SearchOrder.insert(SearchOrder.end(), NewSearchOrder.begin(), + NewSearchOrder.end()); + } else + SearchOrder = std::move(NewSearchOrder); }); } +void JITDylib::addToSearchOrder(JITDylib &JD, + JITDylibLookupFlags JDLookupFlags) { + ES.runSessionLocked([&]() { SearchOrder.push_back({&JD, JDLookupFlags}); }); +} + void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, - bool MatchNonExported) { + JITDylibLookupFlags JDLookupFlags) { ES.runSessionLocked([&]() { - auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(), - [&](const JITDylibSearchList::value_type &KV) { - return KV.first == &OldJD; - }); - - if (I != SearchOrder.end()) - *I = {&NewJD, MatchNonExported}; + for (auto &KV : SearchOrder) + if (KV.first == &OldJD) { + KV = {&NewJD, JDLookupFlags}; + break; + } }); } void JITDylib::removeFromSearchOrder(JITDylib &JD) { ES.runSessionLocked([&]() { auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(), - [&](const JITDylibSearchList::value_type &KV) { + [&](const JITDylibSearchOrder::value_type &KV) { return KV.first == &JD; }); if (I != SearchOrder.end()) @@ -1349,63 +1414,54 @@ Error JITDylib::remove(const SymbolNameSet &Names) { }); } -Expected JITDylib::lookupFlags(const SymbolNameSet &Names) { +Expected +JITDylib::lookupFlags(LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet LookupSet) { return ES.runSessionLocked([&, this]() -> Expected { SymbolFlagsMap Result; - auto Unresolved = lookupFlagsImpl(Result, Names); - if (!Unresolved) - return Unresolved.takeError(); + lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet); - /// Run any definition generators. + // Run any definition generators. for (auto &DG : DefGenerators) { - // Bail out early if we've resolved everything. - if (Unresolved->empty()) + // Bail out early if we found everything. + if (LookupSet.empty()) break; // Run this generator. - auto NewDefs = DG->tryToGenerate(*this, *Unresolved); - if (!NewDefs) - return NewDefs.takeError(); - - if (!NewDefs->empty()) { - auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs); - if (!Unresolved2) - return Unresolved2.takeError(); - (void)Unresolved2; - assert(Unresolved2->empty() && - "All fallback defs should have been found by lookupFlagsImpl"); - } + if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, LookupSet)) + return std::move(Err); - for (auto &Name : *NewDefs) - Unresolved->erase(Name); + // Re-try the search. + lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet); } + return Result; }); } -Expected JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags, - const SymbolNameSet &Names) { - SymbolNameSet Unresolved; - - for (auto &Name : Names) { - auto I = Symbols.find(Name); - if (I != Symbols.end()) { - assert(!Flags.count(Name) && "Symbol already present in Flags map"); - Flags[Name] = I->second.getFlags(); - } else - Unresolved.insert(Name); - } +void JITDylib::lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &LookupSet) { - return Unresolved; + LookupSet.forEachWithRemoval( + [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool { + auto I = Symbols.find(Name); + if (I == Symbols.end()) + return false; + assert(!Result.count(Name) && "Symbol already present in Flags map"); + Result[Name] = I->second.getFlags(); + return true; + }); } -Error JITDylib::lodgeQuery(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs) { +Error JITDylib::lodgeQuery(MaterializationUnitList &MUs, + std::shared_ptr &Q, + LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved) { assert(Q && "Query can not be null"); - if (auto Err = lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs)) + if (auto Err = lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved)) return Err; // Run any definition generators. @@ -1416,104 +1472,86 @@ Error JITDylib::lodgeQuery(std::shared_ptr &Q, break; // Run the generator. - auto NewDefs = DG->tryToGenerate(*this, Unresolved); - - // If the generator returns an error then bail out. - if (!NewDefs) - return NewDefs.takeError(); - - // If the generator was able to generate new definitions for any of the - // unresolved symbols then lodge the query against them. - if (!NewDefs->empty()) { - for (auto &D : *NewDefs) - Unresolved.erase(D); - - // Lodge query. This can not fail as any new definitions were added - // by the generator under the session locked. Since they can't have - // started materializing yet the can not have failed. - cantFail(lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs)); + if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, Unresolved)) + return Err; - assert(NewDefs->empty() && - "All fallback defs should have been found by lookupImpl"); - } + // Lodge query. This can not fail as any new definitions were added + // by the generator under the session locked. Since they can't have + // started materializing yet they can not have failed. + cantFail(lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved)); } return Error::success(); } -Error JITDylib::lodgeQueryImpl( - std::shared_ptr &Q, SymbolNameSet &Unresolved, - bool MatchNonExported, - std::vector> &MUs) { - - std::vector ToRemove; - for (auto Name : Unresolved) { - - // Search for the name in Symbols. Skip it if not found. - auto SymI = Symbols.find(Name); - if (SymI == Symbols.end()) - continue; - - // If this is a non exported symbol and we're skipping those then skip it. - if (!SymI->second.getFlags().isExported() && !MatchNonExported) - continue; - - // If we matched against Name in JD, mark it to be removed from the - // Unresolved set. - ToRemove.push_back(Name); - - // If we matched against this symbol but it is in the error state then - // bail out and treat it as a failure to materialize. - if (SymI->second.getFlags().hasError()) { - auto FailedSymbolsMap = std::make_shared(); - (*FailedSymbolsMap)[this] = {Name}; - return make_error(std::move(FailedSymbolsMap)); - } - - // If this symbol already meets the required state for then notify the - // query and continue. - if (SymI->second.getState() >= Q->getRequiredState()) { - Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); - continue; - } - - // Otherwise this symbol does not yet meet the required state. Check whether - // it has a materializer attached, and if so prepare to run it. - if (SymI->second.hasMaterializerAttached()) { - assert(SymI->second.getAddress() == 0 && - "Symbol not resolved but already has address?"); - auto UMII = UnmaterializedInfos.find(Name); - assert(UMII != UnmaterializedInfos.end() && - "Lazy symbol should have UnmaterializedInfo"); - auto MU = std::move(UMII->second->MU); - assert(MU != nullptr && "Materializer should not be null"); - - // Move all symbols associated with this MaterializationUnit into - // materializing state. - for (auto &KV : MU->getSymbols()) { - auto SymK = Symbols.find(KV.first); - SymK->second.setMaterializerAttached(false); - SymK->second.setState(SymbolState::Materializing); - UnmaterializedInfos.erase(KV.first); - } +Error JITDylib::lodgeQueryImpl(MaterializationUnitList &MUs, + std::shared_ptr &Q, + LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved) { + + return Unresolved.forEachWithRemoval( + [&](const SymbolStringPtr &Name, + SymbolLookupFlags SymLookupFlags) -> Expected { + // Search for name in symbols. If not found then continue without + // removal. + auto SymI = Symbols.find(Name); + if (SymI == Symbols.end()) + return false; + + // If this is a non exported symbol and we're matching exported symbols + // only then skip this symbol without removal. + if (!SymI->second.getFlags().isExported() && + JDLookupFlags == JITDylibLookupFlags::MatchExportedSymbolsOnly) + return false; + + // If we matched against this symbol but it is in the error state then + // bail out and treat it as a failure to materialize. + if (SymI->second.getFlags().hasError()) { + auto FailedSymbolsMap = std::make_shared(); + (*FailedSymbolsMap)[this] = {Name}; + return make_error(std::move(FailedSymbolsMap)); + } - // Add MU to the list of MaterializationUnits to be materialized. - MUs.push_back(std::move(MU)); - } + // If this symbol already meets the required state for then notify the + // query, then remove the symbol and continue. + if (SymI->second.getState() >= Q->getRequiredState()) { + Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); + return true; + } - // Add the query to the PendingQueries list. - assert(SymI->second.isInMaterializationPhase() && - "By this line the symbol should be materializing"); - auto &MI = MaterializingInfos[Name]; - MI.addQuery(Q); - Q->addQueryDependence(*this, Name); - } + // Otherwise this symbol does not yet meet the required state. Check + // whether it has a materializer attached, and if so prepare to run it. + if (SymI->second.hasMaterializerAttached()) { + assert(SymI->second.getAddress() == 0 && + "Symbol not resolved but already has address?"); + auto UMII = UnmaterializedInfos.find(Name); + assert(UMII != UnmaterializedInfos.end() && + "Lazy symbol should have UnmaterializedInfo"); + auto MU = std::move(UMII->second->MU); + assert(MU != nullptr && "Materializer should not be null"); + + // Move all symbols associated with this MaterializationUnit into + // materializing state. + for (auto &KV : MU->getSymbols()) { + auto SymK = Symbols.find(KV.first); + SymK->second.setMaterializerAttached(false); + SymK->second.setState(SymbolState::Materializing); + UnmaterializedInfos.erase(KV.first); + } - // Remove any symbols that we found. - for (auto &Name : ToRemove) - Unresolved.erase(Name); + // Add MU to the list of MaterializationUnits to be materialized. + MUs.push_back(std::move(MU)); + } - return Error::success(); + // Add the query to the PendingQueries list and continue, deleting the + // element. + assert(SymI->second.isInMaterializationPhase() && + "By this line the symbol should be materializing"); + auto &MI = MaterializingInfos[Name]; + MI.addQuery(Q); + Q->addQueryDependence(*this, Name); + return true; + }); } Expected @@ -1526,7 +1564,7 @@ JITDylib::legacyLookup(std::shared_ptr Q, bool QueryComplete = false; std::vector> MUs; - SymbolNameSet Unresolved = std::move(Names); + SymbolLookupSet Unresolved(Names); auto Err = ES.runSessionLocked([&, this]() -> Error { QueryComplete = lookupImpl(Q, MUs, Unresolved); @@ -1538,16 +1576,13 @@ JITDylib::legacyLookup(std::shared_ptr Q, break; assert(!QueryComplete && "query complete but unresolved symbols remain?"); - auto NewDefs = DG->tryToGenerate(*this, Unresolved); - if (!NewDefs) - return NewDefs.takeError(); - if (!NewDefs->empty()) { - for (auto &D : *NewDefs) - Unresolved.erase(D); - QueryComplete = lookupImpl(Q, MUs, *NewDefs); - assert(NewDefs->empty() && - "All fallback defs should have been found by lookupImpl"); - } + if (auto Err = DG->tryToGenerate(LookupKind::Static, *this, + JITDylibLookupFlags::MatchAllSymbols, + Unresolved)) + return Err; + + if (!Unresolved.empty()) + QueryComplete = lookupImpl(Q, MUs, Unresolved); } return Error::success(); }); @@ -1575,68 +1610,68 @@ JITDylib::legacyLookup(std::shared_ptr Q, // for (auto &MU : MUs) // ES.dispatchMaterialization(*this, std::move(MU)); - return Unresolved; + SymbolNameSet RemainingSymbols; + for (auto &KV : Unresolved) + RemainingSymbols.insert(KV.first); + + return RemainingSymbols; } bool JITDylib::lookupImpl( std::shared_ptr &Q, std::vector> &MUs, - SymbolNameSet &Unresolved) { + SymbolLookupSet &Unresolved) { bool QueryComplete = false; std::vector ToRemove; - for (auto Name : Unresolved) { - - // Search for the name in Symbols. Skip it if not found. - auto SymI = Symbols.find(Name); - if (SymI == Symbols.end()) - continue; - - // If we found Name, mark it to be removed from the Unresolved set. - ToRemove.push_back(Name); - - if (SymI->second.getState() >= Q->getRequiredState()) { - Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); - if (Q->isComplete()) - QueryComplete = true; - continue; - } - - // If the symbol is lazy, get the MaterialiaztionUnit for it. - if (SymI->second.hasMaterializerAttached()) { - assert(SymI->second.getAddress() == 0 && - "Lazy symbol should not have a resolved address"); - auto UMII = UnmaterializedInfos.find(Name); - assert(UMII != UnmaterializedInfos.end() && - "Lazy symbol should have UnmaterializedInfo"); - auto MU = std::move(UMII->second->MU); - assert(MU != nullptr && "Materializer should not be null"); - - // Kick all symbols associated with this MaterializationUnit into - // materializing state. - for (auto &KV : MU->getSymbols()) { - auto SymK = Symbols.find(KV.first); - assert(SymK != Symbols.end() && "Missing symbol table entry"); - SymK->second.setState(SymbolState::Materializing); - SymK->second.setMaterializerAttached(false); - UnmaterializedInfos.erase(KV.first); - } + Unresolved.forEachWithRemoval( + [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool { + // Search for the name in Symbols. Skip without removing if not found. + auto SymI = Symbols.find(Name); + if (SymI == Symbols.end()) + return false; + + // If the symbol is already in the required state then notify the query + // and remove. + if (SymI->second.getState() >= Q->getRequiredState()) { + Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); + if (Q->isComplete()) + QueryComplete = true; + return true; + } - // Add MU to the list of MaterializationUnits to be materialized. - MUs.push_back(std::move(MU)); - } + // If the symbol is lazy, get the MaterialiaztionUnit for it. + if (SymI->second.hasMaterializerAttached()) { + assert(SymI->second.getAddress() == 0 && + "Lazy symbol should not have a resolved address"); + auto UMII = UnmaterializedInfos.find(Name); + assert(UMII != UnmaterializedInfos.end() && + "Lazy symbol should have UnmaterializedInfo"); + auto MU = std::move(UMII->second->MU); + assert(MU != nullptr && "Materializer should not be null"); + + // Kick all symbols associated with this MaterializationUnit into + // materializing state. + for (auto &KV : MU->getSymbols()) { + auto SymK = Symbols.find(KV.first); + assert(SymK != Symbols.end() && "Missing symbol table entry"); + SymK->second.setState(SymbolState::Materializing); + SymK->second.setMaterializerAttached(false); + UnmaterializedInfos.erase(KV.first); + } - // Add the query to the PendingQueries list. - assert(SymI->second.isInMaterializationPhase() && - "By this line the symbol should be materializing"); - auto &MI = MaterializingInfos[Name]; - MI.addQuery(Q); - Q->addQueryDependence(*this, Name); - } + // Add MU to the list of MaterializationUnits to be materialized. + MUs.push_back(std::move(MU)); + } - // Remove any marked symbols from the Unresolved set. - for (auto &Name : ToRemove) - Unresolved.erase(Name); + // Add the query to the PendingQueries list. + assert(SymI->second.isInMaterializationPhase() && + "By this line the symbol should be materializing"); + auto &MI = MaterializingInfos[Name]; + MI.addQuery(Q); + Q->addQueryDependence(*this, Name); + return true; + }); return QueryComplete; } @@ -1645,11 +1680,7 @@ void JITDylib::dump(raw_ostream &OS) { ES.runSessionLocked([&, this]() { OS << "JITDylib \"" << JITDylibName << "\" (ES: " << format("0x%016" PRIx64, reinterpret_cast(&ES)) << "):\n" - << "Search order: ["; - for (auto &KV : SearchOrder) - OS << " (\"" << KV.first->getName() << "\", " - << (KV.second ? "all" : "exported only") << ")"; - OS << " ]\n" + << "Search order: " << SearchOrder << "\n" << "Symbol table:\n"; for (auto &KV : Symbols) { @@ -1730,7 +1761,7 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) { JITDylib::JITDylib(ExecutionSession &ES, std::string Name) : ES(ES), JITDylibName(std::move(Name)) { - SearchOrder.push_back({this, true}); + SearchOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols}); } Error JITDylib::defineImpl(MaterializationUnit &MU) { @@ -1823,12 +1854,6 @@ void JITDylib::transferEmittedNodeDependencies( ExecutionSession::ExecutionSession(std::shared_ptr SSP) : SSP(SSP ? std::move(SSP) : std::make_shared()) { - // Construct the main dylib. - JDs.push_back(std::unique_ptr(new JITDylib(*this, "
"))); -} - -JITDylib &ExecutionSession::getMainJITDylib() { - return runSessionLocked([this]() -> JITDylib & { return *JDs.front(); }); } JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) { @@ -1840,14 +1865,11 @@ JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) { }); } -JITDylib &ExecutionSession::createJITDylib(std::string Name, - bool AddToMainDylibSearchOrder) { +JITDylib &ExecutionSession::createJITDylib(std::string Name) { assert(!getJITDylibByName(Name) && "JITDylib with that name already exists"); return runSessionLocked([&, this]() -> JITDylib & { JDs.push_back( std::unique_ptr(new JITDylib(*this, std::move(Name)))); - if (AddToMainDylibSearchOrder) - JDs.front()->addToSearchOrder(*JDs.back()); return *JDs.back(); }); } @@ -1898,7 +1920,7 @@ Expected ExecutionSession::legacyLookup( #endif auto Query = std::make_shared( - Names, RequiredState, std::move(NotifyComplete)); + SymbolLookupSet(Names), RequiredState, std::move(NotifyComplete)); // FIXME: This should be run session locked along with the registration code // and error reporting below. SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names)); @@ -1935,8 +1957,9 @@ Expected ExecutionSession::legacyLookup( } void ExecutionSession::lookup( - const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols, - SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete, + LookupKind K, const JITDylibSearchOrder &SearchOrder, + SymbolLookupSet Symbols, SymbolState RequiredState, + SymbolsResolvedCallback NotifyComplete, RegisterDependenciesFunction RegisterDependencies) { LLVM_DEBUG({ @@ -1965,14 +1988,24 @@ void ExecutionSession::lookup( "JITDylibList should not contain duplicate entries"); auto &JD = *KV.first; - auto MatchNonExported = KV.second; - if (auto Err = JD.lodgeQuery(Q, Unresolved, MatchNonExported, - CollectedMUsMap[&JD])) + auto JDLookupFlags = KV.second; + if (auto Err = JD.lodgeQuery(CollectedMUsMap[&JD], Q, K, JDLookupFlags, + Unresolved)) return Err; } + // Strip any weakly referenced symbols that were not found. + Unresolved.forEachWithRemoval( + [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) { + if (Flags == SymbolLookupFlags::WeaklyReferencedSymbol) { + Q->dropSymbol(Name); + return true; + } + return false; + }); + if (!Unresolved.empty()) - return make_error(std::move(Unresolved)); + return make_error(Unresolved.getSymbolNames()); return Error::success(); }; @@ -2026,8 +2059,8 @@ void ExecutionSession::lookup( } Expected -ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, - const SymbolNameSet &Symbols, +ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, + const SymbolLookupSet &Symbols, LookupKind K, SymbolState RequiredState, RegisterDependenciesFunction RegisterDependencies) { #if LLVM_ENABLE_THREADS @@ -2059,7 +2092,7 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, #endif // Perform the asynchronous lookup. - lookup(SearchOrder, Symbols, RequiredState, NotifyComplete, + lookup(K, SearchOrder, Symbols, RequiredState, NotifyComplete, RegisterDependencies); #if LLVM_ENABLE_THREADS @@ -2080,12 +2113,12 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, } Expected -ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, +ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, SymbolStringPtr Name) { - SymbolNameSet Names({Name}); + SymbolLookupSet Names({Name}); - if (auto ResultMap = lookup(SearchOrder, std::move(Names), SymbolState::Ready, - NoDependenciesToRegister)) { + if (auto ResultMap = lookup(SearchOrder, std::move(Names), LookupKind::Static, + SymbolState::Ready, NoDependenciesToRegister)) { assert(ResultMap->size() == 1 && "Unexpected number of results"); assert(ResultMap->count(Name) && "Missing result for symbol"); return std::move(ResultMap->begin()->second); @@ -2096,14 +2129,7 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, Expected ExecutionSession::lookup(ArrayRef SearchOrder, SymbolStringPtr Name) { - SymbolNameSet Names({Name}); - - JITDylibSearchList FullSearchOrder; - FullSearchOrder.reserve(SearchOrder.size()); - for (auto *JD : SearchOrder) - FullSearchOrder.push_back({JD, false}); - - return lookup(FullSearchOrder, Name); + return lookup(makeJITDylibSearchOrder(SearchOrder), Name); } Expected diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index 4a886ac0597c1..0a3fef207ac2f 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -19,6 +19,32 @@ namespace llvm { namespace orc { +int runAsMain(int (*Main)(int, char *[]), ArrayRef Args, + Optional ProgramName) { + std::vector> ArgVStorage; + std::vector ArgV; + + ArgVStorage.reserve(Args.size() + (ProgramName ? 1 : 0)); + ArgV.reserve(Args.size() + 1 + (ProgramName ? 1 : 0)); + + if (ProgramName) { + ArgVStorage.push_back(std::make_unique(ProgramName->size() + 1)); + llvm::copy(*ProgramName, &ArgVStorage.back()[0]); + ArgVStorage.back()[ProgramName->size()] = '\0'; + ArgV.push_back(ArgVStorage.back().get()); + } + + for (auto &Arg : Args) { + ArgVStorage.push_back(std::make_unique(Arg.size() + 1)); + llvm::copy(Arg, &ArgVStorage.back()[0]); + ArgVStorage.back()[Arg.size()] = '\0'; + ArgV.push_back(ArgVStorage.back().get()); + } + ArgV.push_back(nullptr); + + return Main(Args.size(), ArgV.data()); +} + CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End) : InitList( GV ? dyn_cast_or_null(GV->getInitializer()) : nullptr), @@ -118,19 +144,17 @@ void CtorDtorRunner::add(iterator_range CtorDtors) { Error CtorDtorRunner::run() { using CtorDtorTy = void (*)(); - SymbolNameSet Names; - - for (auto &KV : CtorDtorsByPriority) { - for (auto &Name : KV.second) { - auto Added = Names.insert(Name).second; - (void)Added; - assert(Added && "Ctor/Dtor names clashed"); - } - } + SymbolLookupSet LookupSet; + for (auto &KV : CtorDtorsByPriority) + for (auto &Name : KV.second) + LookupSet.add(Name); + assert(!LookupSet.containsDuplicates() && + "Ctor/Dtor list contains duplicates"); auto &ES = JD.getExecutionSession(); - if (auto CtorDtorMap = - ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names))) { + if (auto CtorDtorMap = ES.lookup( + makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), + std::move(LookupSet))) { for (auto &KV : CtorDtorsByPriority) { for (auto &Name : KV.second) { assert(CtorDtorMap->count(Name) && "No entry for Name"); @@ -190,15 +214,16 @@ DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix, std::move(Lib), GlobalPrefix, std::move(Allow)); } -Expected -DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) { - orc::SymbolNameSet Added; +Error DynamicLibrarySearchGenerator::tryToGenerate( + LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) { orc::SymbolMap NewSymbols; bool HasGlobalPrefix = (GlobalPrefix != '\0'); - for (auto &Name : Names) { + for (auto &KV : Symbols) { + auto &Name = KV.first; + if ((*Name).empty()) continue; @@ -211,20 +236,16 @@ DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD, std::string Tmp((*Name).data() + HasGlobalPrefix, (*Name).size() - HasGlobalPrefix); if (void *Addr = Dylib.getAddressOfSymbol(Tmp.c_str())) { - Added.insert(Name); NewSymbols[Name] = JITEvaluatedSymbol( static_cast(reinterpret_cast(Addr)), JITSymbolFlags::Exported); } } - // Add any new symbols to JD. Since the generator is only called for symbols - // that are not already defined, this will never trigger a duplicate - // definition error, so we can wrap this call in a 'cantFail'. - if (!NewSymbols.empty()) - cantFail(JD.define(absoluteSymbols(std::move(NewSymbols)))); + if (NewSymbols.empty()) + return Error::success(); - return Added; + return JD.define(absoluteSymbols(std::move(NewSymbols))); } Expected> @@ -251,15 +272,24 @@ StaticLibraryDefinitionGenerator::Create( return std::move(ADG); } -Expected -StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) { +Error StaticLibraryDefinitionGenerator::tryToGenerate( + LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) { + + // Don't materialize symbols from static archives unless this is a static + // lookup. + if (K != LookupKind::Static) + return Error::success(); + + // Bail out early if we've already freed the archive. + if (!Archive) + return Error::success(); DenseSet> ChildBufferInfos; - SymbolNameSet NewDefs; - for (const auto &Name : Names) { - auto Child = Archive.findSym(*Name); + for (const auto &KV : Symbols) { + const auto &Name = KV.first; + auto Child = Archive->findSym(*Name); if (!Child) return Child.takeError(); if (*Child == None) @@ -269,7 +299,6 @@ StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, return ChildBuffer.takeError(); ChildBufferInfos.insert( {ChildBuffer->getBuffer(), ChildBuffer->getBufferIdentifier()}); - NewDefs.insert(Name); } for (auto ChildBufferInfo : ChildBufferInfos) { @@ -278,31 +307,16 @@ StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey())) - return std::move(Err); - - --UnrealizedObjects; + return Err; } - return NewDefs; + return Error::success(); } StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator( ObjectLayer &L, std::unique_ptr ArchiveBuffer, Error &Err) : L(L), ArchiveBuffer(std::move(ArchiveBuffer)), - Archive(*this->ArchiveBuffer, Err) { - - if (Err) - return; - - Error Err2 = Error::success(); - for (auto _ : Archive.children(Err2)) { - (void)_; - ++UnrealizedObjects; - } - - // No need to check this: We will leave it to the caller. - Err = std::move(Err2); -} + Archive(std::make_unique(*this->ArchiveBuffer, Err)) {} } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 0295db7633dd0..440935ffe9fb9 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -101,7 +101,10 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback( Name = I->second; } - if (auto Sym = ES.lookup(JITDylibSearchList({{&CallbacksJD, true}}), Name)) + if (auto Sym = + ES.lookup(makeJITDylibSearchOrder( + &CallbacksJD, JITDylibLookupFlags::MatchAllSymbols), + Name)) return Sym->getAddress(); else { llvm::dbgs() << "Didn't find callback.\n"; diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index 1d3e6db913e21..114e81e41771b 100644 --- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -28,14 +28,12 @@ Expected JITTargetMachineBuilder::detectHost() { // Retrieve host CPU name and sub-target features and add them to builder. // Relocation model, code model and codegen opt level are kept to default // values. - llvm::SubtargetFeatures SubtargetFeatures; llvm::StringMap FeatureMap; llvm::sys::getHostCPUFeatures(FeatureMap); for (auto &Feature : FeatureMap) - SubtargetFeatures.AddFeature(Feature.first(), Feature.second); + TMBuilder.getFeatures().AddFeature(Feature.first(), Feature.second); TMBuilder.setCPU(llvm::sys::getHostCPUName()); - TMBuilder.addFeatures(SubtargetFeatures.getFeatures()); return TMBuilder; } diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 03f22e0c2a2a9..89dad6d61b42d 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -56,7 +56,9 @@ Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr Obj) { Expected LLJIT::lookupLinkerMangled(JITDylib &JD, StringRef Name) { - return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name)); + return ES->lookup( + makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), + ES->intern(Name)); } std::unique_ptr @@ -103,7 +105,7 @@ LLJIT::createCompileFunction(LLJITBuilderState &S, LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) : ES(S.ES ? std::move(S.ES) : std::make_unique()), - Main(this->ES->getMainJITDylib()), DL(""), + Main(this->ES->createJITDylib("
")), DL(""), ObjLinkingLayer(createObjectLinkingLayer(S, *ES)), ObjTransformLayer(*this->ES, *ObjLinkingLayer), CtorRunner(Main), DtorRunner(Main) { diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 93aabd817d601..aab490feb8ea2 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -50,8 +50,10 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) { SourceJD = I->second.first; SymbolName = I->second.second; } - auto LookupResult = - ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName); + + auto LookupResult = ES.lookup( + makeJITDylibSearchOrder(SourceJD, JITDylibLookupFlags::MatchAllSymbols), + SymbolName); if (!LookupResult) { ES.reportError(LookupResult.takeError()); diff --git a/llvm/lib/ExecutionEngine/Orc/Legacy.cpp b/llvm/lib/ExecutionEngine/Orc/Legacy.cpp index 9f9a6730b2c30..67b804c37287d 100644 --- a/llvm/lib/ExecutionEngine/Orc/Legacy.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Legacy.cpp @@ -37,7 +37,8 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols, }; auto Q = std::make_shared( - InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap)); + SymbolLookupSet(InternedSymbols), SymbolState::Resolved, + std::move(OnResolvedWithUnwrap)); auto Unresolved = R.lookup(Q, InternedSymbols); if (Unresolved.empty()) { diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 874decb2ade0b..be0ce4a1d75a0 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -47,18 +47,28 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { MR.failMaterialization(); } - void lookup(const DenseSet &Symbols, + void lookup(const LookupMap &Symbols, std::unique_ptr LC) override { - JITDylibSearchList SearchOrder; + JITDylibSearchOrder SearchOrder; MR.getTargetJITDylib().withSearchOrderDo( - [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; }); + [&](const JITDylibSearchOrder &O) { SearchOrder = O; }); auto &ES = Layer.getExecutionSession(); - SymbolNameSet InternedSymbols; - for (auto &S : Symbols) - InternedSymbols.insert(ES.intern(S)); + SymbolLookupSet LookupSet; + for (auto &KV : Symbols) { + orc::SymbolLookupFlags LookupFlags; + switch (KV.second) { + case jitlink::SymbolLookupFlags::RequiredSymbol: + LookupFlags = orc::SymbolLookupFlags::RequiredSymbol; + break; + case jitlink::SymbolLookupFlags::WeaklyReferencedSymbol: + LookupFlags = orc::SymbolLookupFlags::WeaklyReferencedSymbol; + break; + } + LookupSet.add(ES.intern(KV.first), LookupFlags); + } // OnResolve -- De-intern the symbols and pass the result to the linker. auto OnResolve = [this, LookupContinuation = std::move(LC)]( @@ -74,8 +84,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } }; - ES.lookup(SearchOrder, std::move(InternedSymbols), SymbolState::Resolved, - std::move(OnResolve), [this](const SymbolDependenceMap &Deps) { + ES.lookup(LookupKind::Static, SearchOrder, std::move(LookupSet), + SymbolState::Resolved, std::move(OnResolve), + [this](const SymbolDependenceMap &Deps) { registerDependencies(Deps); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 939cd539d1fb0..3344bd4d53f98 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -19,11 +19,11 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) { auto &ES = MR.getTargetJITDylib().getExecutionSession(); - SymbolNameSet InternedSymbols; + SymbolLookupSet InternedSymbols; // Intern the requested symbols: lookup takes interned strings. for (auto &S : Symbols) - InternedSymbols.insert(ES.intern(S)); + InternedSymbols.add(ES.intern(S)); // Build an OnResolve callback to unwrap the interned strings and pass them // to the OnResolved callback. @@ -46,11 +46,12 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { MR.addDependenciesForAll(Deps); }; - JITDylibSearchList SearchOrder; + JITDylibSearchOrder SearchOrder; MR.getTargetJITDylib().withSearchOrderDo( - [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; }); - ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved, - std::move(OnResolvedWithUnwrap), RegisterDependencies); + [&](const JITDylibSearchOrder &JDs) { SearchOrder = JDs; }); + ES.lookup(LookupKind::Static, SearchOrder, InternedSymbols, + SymbolState::Resolved, std::move(OnResolvedWithUnwrap), + RegisterDependencies); } Expected getResponsibilitySet(const LookupSet &Symbols) { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 01989b97f7fa0..f9d4b181f862b 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3400,9 +3400,6 @@ void AssemblyWriter::printTypeIdentities() { /// printFunction - Print all aspects of a function. void AssemblyWriter::printFunction(const Function *F) { - // Print out the return type and name. - Out << '\n'; - if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); if (F->isMaterializable()) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d2dd2a69beab2..5aaf90df6f6e3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -559,6 +559,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); return true; } + if (Name.startswith("arm.neon.vqadds.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat, + F->arg_begin()->getType()); + return true; + } + if (Name.startswith("arm.neon.vqaddu.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat, + F->arg_begin()->getType()); + return true; + } + if (Name.startswith("arm.neon.vqsubs.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat, + F->arg_begin()->getType()); + return true; + } + if (Name.startswith("arm.neon.vqsubu.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat, + F->arg_begin()->getType()); + return true; + } if (Name.startswith("aarch64.neon.addp")) { if (F->arg_size() != 2) break; // Invalid IR. diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index 68c3c7ad90dab..2a8ea0657dbb6 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -64,11 +64,11 @@ ConstantRange ConstantRange::fromKnownBits(const KnownBits &Known, // For unsigned ranges, or signed ranges with known sign bit, create a simple // range between the smallest and largest possible value. if (!IsSigned || Known.isNegative() || Known.isNonNegative()) - return ConstantRange(Known.One, ~Known.Zero + 1); + return ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1); // If we don't know the sign bit, pick the lower bound as a negative number // and the upper bound as a non-negative one. - APInt Lower = Known.One, Upper = ~Known.Zero; + APInt Lower = Known.getMinValue(), Upper = Known.getMaxValue(); Lower.setSignBit(); Upper.clearSignBit(); return ConstantRange(Lower, Upper + 1); diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 59b4a5ef8a186..bdd9f6baf3791 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -306,10 +306,11 @@ DIDerivedType *DIBuilder::createReferenceType( DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name, DIFile *File, unsigned LineNo, - DIScope *Context) { + DIScope *Context, + uint32_t AlignInBits) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File, - LineNo, getNonCompileUnitScope(Context), Ty, 0, 0, - 0, None, DINode::FlagZero); + LineNo, getNonCompileUnitScope(Context), Ty, 0, + AlignInBits, 0, None, DINode::FlagZero); } DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 1bbe6b85d2600..62bfeb5c5d77a 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1108,11 +1108,10 @@ LLVMMetadataRef LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef Builder, LLVMMetadataRef Type, const char *Name, size_t NameLen, LLVMMetadataRef File, unsigned LineNo, - LLVMMetadataRef Scope) { + LLVMMetadataRef Scope, uint32_t AlignInBits) { return wrap(unwrap(Builder)->createTypedef( - unwrapDI(Type), {Name, NameLen}, - unwrapDI(File), LineNo, - unwrapDI(Scope))); + unwrapDI(Type), {Name, NameLen}, unwrapDI(File), LineNo, + unwrapDI(Scope), AlignInBits)); } LLVMMetadataRef diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index e4036ee1eb0c9..9b42a5a0e1b5b 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -23,6 +23,9 @@ using namespace llvm; +const DIExpression::FragmentInfo DebugVariable::DefaultFragment = { + std::numeric_limits::max(), std::numeric_limits::min()}; + DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line, unsigned Column, ArrayRef MDs, bool ImplicitCode) @@ -1148,10 +1151,14 @@ Optional DIExpression::createFragmentExpression( for (auto Op : Expr->expr_ops()) { switch (Op.getOp()) { default: break; + case dwarf::DW_OP_shr: + case dwarf::DW_OP_shra: + case dwarf::DW_OP_shl: case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_minus: - // We can't safely split arithmetic into multiple fragments because we - // can't express carry-over between fragments. + // We can't safely split arithmetic or shift operations into multiple + // fragments because we can't express carry-over between fragments. // // FIXME: We *could* preserve the lowest fragment of a constant offset // operation if the offset fits into SizeInBits. diff --git a/llvm/lib/IR/IRPrintingPasses.cpp b/llvm/lib/IR/IRPrintingPasses.cpp index 8fa97a3aecb73..03657ff8d9d43 100644 --- a/llvm/lib/IR/IRPrintingPasses.cpp +++ b/llvm/lib/IR/IRPrintingPasses.cpp @@ -57,7 +57,7 @@ PreservedAnalyses PrintFunctionPass::run(Function &F, if (forcePrintModuleIR()) OS << Banner << " (function: " << F.getName() << ")\n" << *F.getParent(); else - OS << Banner << static_cast(F); + OS << Banner << '\n' << static_cast(F); } return PreservedAnalyses::all(); } diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 8fe59912f20ac..90239bb762989 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -1776,58 +1776,42 @@ LLVM_DUMP_METHOD void PMStack::dump() const { void ModulePass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { // Find Module Pass Manager - while (!PMS.empty()) { - PassManagerType TopPMType = PMS.top()->getPassManagerType(); - if (TopPMType == PreferredType) - break; // We found desired pass manager - else if (TopPMType > PMT_ModulePassManager) - PMS.pop(); // Pop children pass managers - else - break; - } - assert(!PMS.empty() && "Unable to find appropriate Pass Manager"); + PassManagerType T; + while ((T = PMS.top()->getPassManagerType()) > PMT_ModulePassManager && + T != PreferredType) + PMS.pop(); PMS.top()->add(this); } /// Find appropriate Function Pass Manager or Call Graph Pass Manager /// in the PM Stack and add self into that manager. void FunctionPass::assignPassManager(PMStack &PMS, - PassManagerType PreferredType) { - + PassManagerType /*PreferredType*/) { // Find Function Pass Manager - while (!PMS.empty()) { - if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) - PMS.pop(); - else - break; - } + PMDataManager *PM; + while (PM = PMS.top(), PM->getPassManagerType() > PMT_FunctionPassManager) + PMS.pop(); // Create new Function Pass Manager if needed. - FPPassManager *FPP; - if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) { - FPP = (FPPassManager *)PMS.top(); - } else { - assert(!PMS.empty() && "Unable to create Function Pass Manager"); - PMDataManager *PMD = PMS.top(); - + if (PM->getPassManagerType() != PMT_FunctionPassManager) { // [1] Create new Function Pass Manager - FPP = new FPPassManager(); + auto *FPP = new FPPassManager; FPP->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager - PMTopLevelManager *TPM = PMD->getTopLevelManager(); - TPM->addIndirectPassManager(FPP); + PM->getTopLevelManager()->addIndirectPassManager(FPP); // [3] Assign manager to manage this new manager. This may create // and push new managers into PMS - FPP->assignPassManager(PMS, PMD->getPassManagerType()); + FPP->assignPassManager(PMS, PM->getPassManagerType()); // [4] Push new manager into PMS PMS.push(FPP); + PM = FPP; } // Assign FPP as the manager of this pass. - FPP->add(this); + PM->add(this); } PassManagerBase::~PassManagerBase() {} diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index ebe22c37c707b..0d48090e4268b 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1381,8 +1381,12 @@ lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, bool RemarksWithHotness, int Count) { std::string Filename = RemarksFilename; + // For ThinLTO, file.opt. becomes + // file.opt..thin... if (!Filename.empty() && Count != -1) - Filename += ".thin." + llvm::utostr(Count) + ".yaml"; + Filename = + (Twine(Filename) + ".thin." + llvm::utostr(Count) + "." + RemarksFormat) + .str(); auto ResultOrErr = llvm::setupOptimizationRemarks( Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness); diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index bcc7c45afc01b..b4b3c9956cc2d 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1701,7 +1701,8 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart, MakeStartMinusEndExpr(Streamer, SectionStart, cieStart, 0); emitAbsValue(Streamer, offset, 4); } else { - Streamer.EmitSymbolValue(&cieStart, 4); + Streamer.EmitSymbolValue(&cieStart, 4, + asmInfo->needsDwarfSectionOffsetDirective()); } // PC Begin diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 5b4da1998c414..9aee0a5ca4e5e 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -463,6 +463,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { DebugSecType, ELF::SHF_EXCLUDE); DwarfRnglistsDWOSection = Ctx->getELFSection(".debug_rnglists.dwo", DebugSecType, ELF::SHF_EXCLUDE); + DwarfMacinfoDWOSection = + Ctx->getELFSection(".debug_macinfo.dwo", DebugSecType, ELF::SHF_EXCLUDE); DwarfLoclistsDWOSection = Ctx->getELFSection(".debug_loclists.dwo", DebugSecType, ELF::SHF_EXCLUDE); diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp index f646168d3a4a7..8377e295532ae 100644 --- a/llvm/lib/MC/MCSectionXCOFF.cpp +++ b/llvm/lib/MC/MCSectionXCOFF.cpp @@ -40,6 +40,8 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, case XCOFF::XMC_DS: OS << "\t.csect " << QualName->getName() << '\n'; break; + case XCOFF::XMC_TC: + break; case XCOFF::XMC_TC0: OS << "\t.toc\n"; break; diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index c40a067e93e17..6efa167ced42a 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -50,12 +50,6 @@ void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, XCOFF::C_HIDEXT); Symbol->setCommon(Size, ByteAlignment); - // Need to add this symbol to the current Fragment which will belong to the - // containing CSECT. - auto *F = dyn_cast_or_null(getCurrentFragment()); - assert(F && "Expected a valid section with a fragment set."); - Symbol->setFragment(F); - // Emit the alignment and storage for the variable to the section. EmitValueToAlignment(ByteAlignment); EmitZeros(Size); diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index ca96a0ecf9ff5..773ca3a0909de 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -275,6 +275,12 @@ CsectGroup &XCOFFObjectWriter::getCsectGroup(const MCSectionXCOFF *MCSec) { "We should have only one TOC-base, and it should be the first csect " "in this CsectGroup."); return TOCCsects; + case XCOFF::XMC_TC: + assert(XCOFF::XTY_SD == MCSec->getCSectType() && + "Only an initialized csect can contain TC entry."); + assert(!TOCCsects.empty() && + "We should at least have a TOC-base in this CsectGroup."); + return TOCCsects; default: report_fatal_error("Unhandled mapping of csect to section."); } @@ -574,7 +580,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { // yet, so start at index 0. uint32_t SymbolTableIndex = 0; - // Calculate undefined symbol's indices. + // Calculate indices for undefined symbols. for (auto &Csect : UndefinedCsects) { Csect.Size = 0; Csect.Address = 0; diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index e8b54a7e60200..d2e6fdfea009a 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -248,7 +248,7 @@ ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) ImplicitSections.push_back(".symtab"); ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"}); - if (!Doc.DynamicSymbols.empty()) + if (Doc.DynamicSymbols) ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"}); // Insert placeholders for implicit sections that are not @@ -562,21 +562,24 @@ void ELFState::initSymtabSectionHeader(Elf_Shdr &SHeader, ArrayRef Symbols; if (IsStatic && Doc.Symbols) Symbols = *Doc.Symbols; - else if (!IsStatic) - Symbols = Doc.DynamicSymbols; + else if (!IsStatic && Doc.DynamicSymbols) + Symbols = *Doc.DynamicSymbols; ELFYAML::RawContentSection *RawSec = dyn_cast_or_null(YAMLSec); - if (RawSec && !Symbols.empty() && (RawSec->Content || RawSec->Size)) { - if (RawSec->Content) - reportError("cannot specify both `Content` and " + - (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) + - " for symbol table section '" + RawSec->Name + "'"); - if (RawSec->Size) - reportError("cannot specify both `Size` and " + - (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) + - " for symbol table section '" + RawSec->Name + "'"); - return; + if (RawSec && (RawSec->Content || RawSec->Size)) { + bool HasSymbolsDescription = + (IsStatic && Doc.Symbols) || (!IsStatic && Doc.DynamicSymbols); + if (HasSymbolsDescription) { + StringRef Property = (IsStatic ? "`Symbols`" : "`DynamicSymbols`"); + if (RawSec->Content) + reportError("cannot specify both `Content` and " + Property + + " for symbol table section '" + RawSec->Name + "'"); + if (RawSec->Size) + reportError("cannot specify both `Size` and " + Property + + " for symbol table section '" + RawSec->Name + "'"); + return; + } } zero(SHeader); @@ -985,9 +988,19 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, raw_ostream &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + SHeader.sh_info = Section.Info; + + if (Section.Content) { + SHeader.sh_size = writeContent(OS, Section.Content, None); + return; + } + + if (!Section.Entries) + return; + uint64_t AuxCnt = 0; - for (size_t I = 0; I < Section.Entries.size(); ++I) { - const ELFYAML::VerdefEntry &E = Section.Entries[I]; + for (size_t I = 0; I < Section.Entries->size(); ++I) { + const ELFYAML::VerdefEntry &E = (*Section.Entries)[I]; Elf_Verdef VerDef; VerDef.vd_version = E.Version; @@ -996,7 +1009,7 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, VerDef.vd_hash = E.Hash; VerDef.vd_aux = sizeof(Elf_Verdef); VerDef.vd_cnt = E.VerNames.size(); - if (I == Section.Entries.size() - 1) + if (I == Section.Entries->size() - 1) VerDef.vd_next = 0; else VerDef.vd_next = @@ -1014,9 +1027,8 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, } } - SHeader.sh_size = Section.Entries.size() * sizeof(Elf_Verdef) + + SHeader.sh_size = Section.Entries->size() * sizeof(Elf_Verdef) + AuxCnt * sizeof(Elf_Verdaux); - SHeader.sh_info = Section.Info; } template @@ -1027,15 +1039,24 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, typedef typename ELFT::Vernaux Elf_Vernaux; auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + SHeader.sh_info = Section.Info; + + if (Section.Content) { + SHeader.sh_size = writeContent(OS, Section.Content, None); + return; + } + + if (!Section.VerneedV) + return; uint64_t AuxCnt = 0; - for (size_t I = 0; I < Section.VerneedV.size(); ++I) { - const ELFYAML::VerneedEntry &VE = Section.VerneedV[I]; + for (size_t I = 0; I < Section.VerneedV->size(); ++I) { + const ELFYAML::VerneedEntry &VE = (*Section.VerneedV)[I]; Elf_Verneed VerNeed; VerNeed.vn_version = VE.Version; VerNeed.vn_file = DotDynstr.getOffset(VE.File); - if (I == Section.VerneedV.size() - 1) + if (I == Section.VerneedV->size() - 1) VerNeed.vn_next = 0; else VerNeed.vn_next = @@ -1060,9 +1081,8 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, } } - SHeader.sh_size = Section.VerneedV.size() * sizeof(Elf_Verneed) + + SHeader.sh_size = Section.VerneedV->size() * sizeof(Elf_Verneed) + AuxCnt * sizeof(Elf_Vernaux); - SHeader.sh_info = Section.Info; } template @@ -1317,7 +1337,8 @@ template void ELFState::buildSymbolIndexes() { if (Doc.Symbols) Build(*Doc.Symbols, SymN2I); - Build(Doc.DynamicSymbols, DynSymN2I); + if (Doc.DynamicSymbols) + Build(*Doc.DynamicSymbols, DynSymN2I); } template void ELFState::finalizeStrings() { @@ -1328,22 +1349,26 @@ template void ELFState::finalizeStrings() { DotStrtab.finalize(); // Add the dynamic symbol names to .dynstr section. - for (const ELFYAML::Symbol &Sym : Doc.DynamicSymbols) - DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name)); + if (Doc.DynamicSymbols) + for (const ELFYAML::Symbol &Sym : *Doc.DynamicSymbols) + DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name)); // SHT_GNU_verdef and SHT_GNU_verneed sections might also // add strings to .dynstr section. for (const ELFYAML::Chunk *Sec : Doc.getSections()) { if (auto VerNeed = dyn_cast(Sec)) { - for (const ELFYAML::VerneedEntry &VE : VerNeed->VerneedV) { - DotDynstr.add(VE.File); - for (const ELFYAML::VernauxEntry &Aux : VE.AuxV) - DotDynstr.add(Aux.Name); + if (VerNeed->VerneedV) { + for (const ELFYAML::VerneedEntry &VE : *VerNeed->VerneedV) { + DotDynstr.add(VE.File); + for (const ELFYAML::VernauxEntry &Aux : VE.AuxV) + DotDynstr.add(Aux.Name); + } } } else if (auto VerDef = dyn_cast(Sec)) { - for (const ELFYAML::VerdefEntry &E : VerDef->Entries) - for (StringRef Name : E.VerNames) - DotDynstr.add(Name); + if (VerDef->Entries) + for (const ELFYAML::VerdefEntry &E : *VerDef->Entries) + for (StringRef Name : E.VerNames) + DotDynstr.add(Name); } } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index a5e5894af04d4..c8de7a662fc18 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -54,6 +54,7 @@ void ScalarEnumerationTraits::enumeration( ECase(PT_GNU_EH_FRAME); ECase(PT_GNU_STACK); ECase(PT_GNU_RELRO); + ECase(PT_GNU_PROPERTY); #undef ECase IO.enumFallback(Value); } @@ -1074,7 +1075,8 @@ static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) { static void sectionMapping(IO &IO, ELFYAML::VerdefSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Info", Section.Info); - IO.mapRequired("Entries", Section.Entries); + IO.mapOptional("Entries", Section.Entries); + IO.mapOptional("Content", Section.Content); } static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) { @@ -1085,7 +1087,8 @@ static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) { static void sectionMapping(IO &IO, ELFYAML::VerneedSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Info", Section.Info); - IO.mapRequired("Dependencies", Section.VerneedV); + IO.mapOptional("Dependencies", Section.VerneedV); + IO.mapOptional("Content", Section.Content); } static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) { @@ -1419,6 +1422,20 @@ StringRef MappingTraits>::validate( return {}; } + if (const auto *VD = dyn_cast(C.get())) { + if (VD->Entries && VD->Content) + return "SHT_GNU_verdef: \"Entries\" and \"Content\" can't be used " + "together"; + return {}; + } + + if (const auto *VD = dyn_cast(C.get())) { + if (VD->VerneedV && VD->Content) + return "SHT_GNU_verneed: \"Dependencies\" and \"Content\" can't be used " + "together"; + return {}; + } + return {}; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d988506b5e980..8b583bde5909c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -240,6 +240,7 @@ FUNCTION_PASS("verify", LoopVerifierPass()) FUNCTION_PASS("verify", MemorySSAVerifierPass()) FUNCTION_PASS("verify", RegionInfoVerifierPass()) FUNCTION_PASS("verify", SafepointIRVerifierPass()) +FUNCTION_PASS("verify", ScalarEvolutionVerifierPass()) FUNCTION_PASS("view-cfg", CFGViewerPass()) FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 92e0f5b221048..1f424075d47fd 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -24,11 +24,13 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" @@ -37,9 +39,11 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace llvm; using namespace cl; @@ -1043,14 +1047,16 @@ static bool hasUTF8ByteOrderMark(ArrayRef S) { return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf'); } -static bool ExpandResponseFile(StringRef FName, StringSaver &Saver, - TokenizerCallback Tokenizer, - SmallVectorImpl &NewArgv, - bool MarkEOLs, bool RelativeNames) { - ErrorOr> MemBufOrErr = - MemoryBuffer::getFile(FName); +// FName must be an absolute path. +static llvm::Error ExpandResponseFile( + StringRef FName, StringSaver &Saver, TokenizerCallback Tokenizer, + SmallVectorImpl &NewArgv, bool MarkEOLs, bool RelativeNames, + llvm::vfs::FileSystem &FS) { + assert(sys::path::is_absolute(FName)); + llvm::ErrorOr> MemBufOrErr = + FS.getBufferForFile(FName); if (!MemBufOrErr) - return false; + return llvm::errorCodeToError(MemBufOrErr.getError()); MemoryBuffer &MemBuf = *MemBufOrErr.get(); StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); @@ -1059,7 +1065,8 @@ static bool ExpandResponseFile(StringRef FName, StringSaver &Saver, std::string UTF8Buf; if (hasUTF16ByteOrderMark(BufRef)) { if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) - return false; + return llvm::createStringError(std::errc::illegal_byte_sequence, + "Could not convert UTF16 to UTF8"); Str = StringRef(UTF8Buf); } // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove @@ -1071,41 +1078,40 @@ static bool ExpandResponseFile(StringRef FName, StringSaver &Saver, // Tokenize the contents into NewArgv. Tokenizer(Str, Saver, NewArgv, MarkEOLs); + if (!RelativeNames) + return Error::success(); + llvm::StringRef BasePath = llvm::sys::path::parent_path(FName); // If names of nested response files should be resolved relative to including // file, replace the included response file names with their full paths // obtained by required resolution. - if (RelativeNames) - for (unsigned I = 0; I < NewArgv.size(); ++I) - if (NewArgv[I]) { - StringRef Arg = NewArgv[I]; - if (Arg.front() == '@') { - StringRef FileName = Arg.drop_front(); - if (llvm::sys::path::is_relative(FileName)) { - SmallString<128> ResponseFile; - ResponseFile.append(1, '@'); - if (llvm::sys::path::is_relative(FName)) { - SmallString<128> curr_dir; - llvm::sys::fs::current_path(curr_dir); - ResponseFile.append(curr_dir.str()); - } - llvm::sys::path::append( - ResponseFile, llvm::sys::path::parent_path(FName), FileName); - NewArgv[I] = Saver.save(ResponseFile.c_str()).data(); - } - } - } + for (auto &Arg : NewArgv) { + // Skip non-rsp file arguments. + if (!Arg || Arg[0] != '@') + continue; - return true; + StringRef FileName(Arg + 1); + // Skip if non-relative. + if (!llvm::sys::path::is_relative(FileName)) + continue; + + SmallString<128> ResponseFile; + ResponseFile.push_back('@'); + ResponseFile.append(BasePath); + llvm::sys::path::append(ResponseFile, FileName); + Arg = Saver.save(ResponseFile.c_str()).data(); + } + return Error::success(); } /// Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, - SmallVectorImpl &Argv, - bool MarkEOLs, bool RelativeNames) { + SmallVectorImpl &Argv, bool MarkEOLs, + bool RelativeNames, llvm::vfs::FileSystem &FS, + llvm::Optional CurrentDir) { bool AllExpanded = true; struct ResponseFileRecord { - const char *File; + std::string File; size_t End; }; @@ -1139,8 +1145,31 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, } const char *FName = Arg + 1; - auto IsEquivalent = [FName](const ResponseFileRecord &RFile) { - return sys::fs::equivalent(RFile.File, FName); + // Note that CurrentDir is only used for top-level rsp files, the rest will + // always have an absolute path deduced from the containing file. + SmallString<128> CurrDir; + if (llvm::sys::path::is_relative(FName)) { + if (!CurrentDir) + llvm::sys::fs::current_path(CurrDir); + else + CurrDir = *CurrentDir; + llvm::sys::path::append(CurrDir, FName); + FName = CurrDir.c_str(); + } + auto IsEquivalent = [FName, &FS](const ResponseFileRecord &RFile) { + llvm::ErrorOr LHS = FS.status(FName); + if (!LHS) { + // TODO: The error should be propagated up the stack. + llvm::consumeError(llvm::errorCodeToError(LHS.getError())); + return false; + } + llvm::ErrorOr RHS = FS.status(RFile.File); + if (!RHS) { + // TODO: The error should be propagated up the stack. + llvm::consumeError(llvm::errorCodeToError(RHS.getError())); + return false; + } + return LHS->equivalent(*RHS); }; // Check for recursive response files. @@ -1155,10 +1184,13 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, // Replace this response file argument with the tokenization of its // contents. Nested response files are expanded in subsequent iterations. SmallVector ExpandedArgv; - if (!ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs, - RelativeNames)) { + if (llvm::Error Err = + ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs, + RelativeNames, FS)) { // We couldn't read this file, so we leave it in the argument stream and // move on. + // TODO: The error should be propagated up the stack. + llvm::consumeError(std::move(Err)); AllExpanded = false; ++I; continue; @@ -1186,9 +1218,20 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, SmallVectorImpl &Argv) { - if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv, - /*MarkEOLs*/ false, /*RelativeNames*/ true)) + SmallString<128> AbsPath; + if (sys::path::is_relative(CfgFile)) { + llvm::sys::fs::current_path(AbsPath); + llvm::sys::path::append(AbsPath, CfgFile); + CfgFile = AbsPath.str(); + } + if (llvm::Error Err = + ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true, + *llvm::vfs::getRealFileSystem())) { + // TODO: The error should be propagated up the stack. + llvm::consumeError(std::move(Err)); return false; + } return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv, /*MarkEOLs*/ false, /*RelativeNames*/ true); } diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 7e07b8f7ca264..ef38c1c09413a 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -140,6 +140,9 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Case("POWER8E", "pwr8") .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") + // FIXME: If we get a simulator or machine with the capabilities of + // mcpu=future, we should revisit this and add the name reported by the + // simulator/machine. .Default(generic); } diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index a6c591fca3121..8f3f4aa8caeaf 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -21,8 +21,8 @@ static KnownBits computeForAddCarry( assert(!(CarryZero && CarryOne) && "Carry can't be zero and one at the same time"); - APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero; - APInt PossibleSumOne = LHS.One + RHS.One + CarryOne; + APInt PossibleSumZero = LHS.getMaxValue() + RHS.getMaxValue() + !CarryZero; + APInt PossibleSumOne = LHS.getMinValue() + RHS.getMinValue() + CarryOne; // Compute known bits of the carry. APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero); diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 14def83802daf..3c9a08cb4077d 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -496,27 +496,50 @@ void replace_extension(SmallVectorImpl &path, const Twine &extension, path.append(ext.begin(), ext.end()); } -void replace_path_prefix(SmallVectorImpl &Path, +bool replace_path_prefix(SmallVectorImpl &Path, const StringRef &OldPrefix, const StringRef &NewPrefix, - Style style) { + Style style, bool strict) { if (OldPrefix.empty() && NewPrefix.empty()) - return; + return false; StringRef OrigPath(Path.begin(), Path.size()); - if (!OrigPath.startswith(OldPrefix)) - return; + StringRef OldPrefixDir; + + if (!strict && OldPrefix.size() > OrigPath.size()) + return false; + + // Ensure OldPrefixDir does not have a trailing separator. + if (!OldPrefix.empty() && is_separator(OldPrefix.back())) + OldPrefixDir = parent_path(OldPrefix, style); + else + OldPrefixDir = OldPrefix; + + if (!OrigPath.startswith(OldPrefixDir)) + return false; + + if (OrigPath.size() > OldPrefixDir.size()) + if (!is_separator(OrigPath[OldPrefixDir.size()], style) && strict) + return false; // If prefixes have the same size we can simply copy the new one over. - if (OldPrefix.size() == NewPrefix.size()) { + if (OldPrefixDir.size() == NewPrefix.size() && !strict) { llvm::copy(NewPrefix, Path.begin()); - return; + return true; } - StringRef RelPath = OrigPath.substr(OldPrefix.size()); + StringRef RelPath = OrigPath.substr(OldPrefixDir.size()); SmallString<256> NewPath; path::append(NewPath, style, NewPrefix); - path::append(NewPath, style, RelPath); + if (!RelPath.empty()) { + if (!is_separator(RelPath[0], style) || !strict) + path::append(NewPath, style, RelPath); + else + path::append(NewPath, style, relative_path(RelPath, style)); + } + Path.swap(NewPath); + + return true; } void native(const Twine &path, SmallVectorImpl &result, Style style) { diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index c3d742388aafa..6c993387e59d8 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -13,8 +13,8 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" #include #include #include @@ -33,14 +33,14 @@ typedef std::pair NameAndCountAndDurationType; struct Entry { - TimePointType Start; + const TimePointType Start; TimePointType End; - std::string Name; - std::string Detail; + const std::string Name; + const std::string Detail; Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), - Detail(std::move(Dt)){}; + Detail(std::move(Dt)) {} // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoid truncation issues causing inner @@ -59,10 +59,9 @@ struct Entry { }; struct TimeTraceProfiler { - TimeTraceProfiler(unsigned TimeTraceGranularity = 0) - : TimeTraceGranularity(TimeTraceGranularity) { - StartTime = steady_clock::now(); - } + TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") + : StartTime(steady_clock::now()), ProcName(ProcName), + TimeTraceGranularity(TimeTraceGranularity) {} void begin(std::string Name, llvm::function_ref Detail) { Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), @@ -169,7 +168,7 @@ struct TimeTraceProfiler { J.attribute("ts", 0); J.attribute("ph", "M"); J.attribute("name", "process_name"); - J.attributeObject("args", [&] { J.attribute("name", "clang"); }); + J.attributeObject("args", [&] { J.attribute("name", ProcName); }); }); J.arrayEnd(); @@ -180,16 +179,19 @@ struct TimeTraceProfiler { SmallVector Stack; SmallVector Entries; StringMap CountAndTotalPerName; - TimePointType StartTime; + const TimePointType StartTime; + const std::string ProcName; // Minimum time granularity (in microseconds) - unsigned TimeTraceGranularity; + const unsigned TimeTraceGranularity; }; -void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) { +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, + StringRef ProcName) { assert(TimeTraceProfilerInstance == nullptr && "Profiler should not be initialized"); - TimeTraceProfilerInstance = new TimeTraceProfiler(TimeTraceGranularity); + TimeTraceProfilerInstance = new TimeTraceProfiler( + TimeTraceGranularity, llvm::sys::path::filename(ProcName)); } void timeTraceProfilerCleanup() { diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 8c26fa9b8f29e..2b31672670c89 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -361,16 +361,16 @@ static RETSIGTYPE SignalHandler(int Sig) { { RemoveFilesToRemove(); + if (Sig == SIGPIPE) + if (auto OldOneShotPipeFunction = + OneShotPipeSignalFunction.exchange(nullptr)) + return OldOneShotPipeFunction(); + if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig) != std::end(IntSigs)) { if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr)) return OldInterruptFunction(); - if (Sig == SIGPIPE) - if (auto OldOneShotPipeFunction = - OneShotPipeSignalFunction.exchange(nullptr)) - return OldOneShotPipeFunction(); - raise(Sig); // Execute the default handler. return; } diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index da156d6084178..054ef8f482ca9 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -352,8 +352,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { } // Check for flag reads and clobbers. - MIOperands::PhysRegInfo PRI = - MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI); + PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI); if (PRI.Read) { // The ccmp doesn't produce exactly the same flags as the original diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index eca9b1e75c2ac..8f88198203d74 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -452,9 +452,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - if (MF.getFunction().hasOptSize()) - return false; - if (AFI->getLocalStackSize() == 0) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9e8df33218b88..db00f81e53eda 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -828,6 +828,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1) setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); } + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); @@ -1333,6 +1335,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; + case AArch64ISD::INSR: return "AArch64ISD::INSR"; + case AArch64ISD::GLD1: return "AArch64ISD::GLD1"; + case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED"; + case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW"; + case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW"; + case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED"; + case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED"; + case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM"; } return nullptr; } @@ -2884,6 +2894,16 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::aarch64_sve_insr: { + SDValue Scalar = Op.getOperand(2); + EVT ScalarTy = Scalar.getValueType(); + if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) + Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar); + + return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(), + Op.getOperand(1), Scalar); + } + case Intrinsic::localaddress: { const auto &MF = DAG.getMachineFunction(); const auto *RegInfo = Subtarget->getRegisterInfo(); @@ -11747,6 +11767,85 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(MinOffset, DL, MVT::i64)); } +// Returns an SVE type that ContentTy can be trivially sign or zero extended +// into. +static MVT getSVEContainerType(EVT ContentTy) { + assert(ContentTy.isSimple() && "No SVE containers for extended types"); + + switch (ContentTy.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("No known SVE container for this MVT type"); + case MVT::nxv2i8: + case MVT::nxv2i16: + case MVT::nxv2i32: + case MVT::nxv2i64: + case MVT::nxv2f32: + case MVT::nxv2f64: + return MVT::nxv2i64; + case MVT::nxv4i8: + case MVT::nxv4i16: + case MVT::nxv4i32: + case MVT::nxv4f32: + return MVT::nxv4i32; + } +} + +static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG, + unsigned Opcode) { + EVT RetVT = N->getValueType(0); + assert(RetVT.isScalableVector() && + "Gather loads are only possible for SVE vectors"); + + SDLoc DL(N); + MVT RetElVT = RetVT.getVectorElementType().getSimpleVT(); + unsigned NumElements = AArch64::SVEBitsPerBlock / RetElVT.getSizeInBits(); + + EVT MaxVT = llvm::MVT::getScalableVectorVT(RetElVT, NumElements); + if (RetVT.getSizeInBits().getKnownMinSize() > + MaxVT.getSizeInBits().getKnownMinSize()) + return SDValue(); + + // Depending on the addressing mode, this is either a pointer or a vector of + // pointers (that fits into one register) + const SDValue Base = N->getOperand(3); + // Depending on the addressing mode, this is either a single offset or a + // vector of offsets (that fits into one register) + const SDValue Offset = N->getOperand(4); + + if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) || + !DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType())) + return SDValue(); + + // Return value type that is representable in hardware + EVT HwRetVt = getSVEContainerType(RetVT); + + // Keep the original output value type around - this will better inform + // optimisations (e.g. instruction folding when load is followed by + // zext/sext). This will only be used for ints, so the value for FPs + // doesn't matter. + SDValue OutVT = DAG.getValueType(RetVT); + if (RetVT.isFloatingPoint()) + OutVT = DAG.getValueType(HwRetVt); + + SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other); + SDValue Ops[] = {N->getOperand(0), // Chain + N->getOperand(2), // Pg + Base, Offset, OutVT}; + + SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops); + SDValue LoadChain = SDValue(Load.getNode(), 1); + + if (RetVT.isInteger() && (RetVT != HwRetVt)) + Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0)); + + // If the original return value was FP, bitcast accordingly. Doing it here + // means that we can avoid adding TableGen patterns for FPs. + if (RetVT.isFloatingPoint()) + Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0)); + + return DAG.getMergeValues({Load, LoadChain}, DL); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -11833,6 +11932,20 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::aarch64_neon_st3lane: case Intrinsic::aarch64_neon_st4lane: return performNEONPostLDSTCombine(N, DCI, DAG); + case Intrinsic::aarch64_sve_ld1_gather: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1); + case Intrinsic::aarch64_sve_ld1_gather_index: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED); + case Intrinsic::aarch64_sve_ld1_gather_sxtw: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW); + case Intrinsic::aarch64_sve_ld1_gather_uxtw: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW); + case Intrinsic::aarch64_sve_ld1_gather_sxtw_index: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED); + case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED); + case Intrinsic::aarch64_sve_ld1_gather_imm: + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM); default: break; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 384c7b4456f0a..118ab7f3d25e6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -196,6 +196,17 @@ enum NodeType : unsigned { UUNPKHI, UUNPKLO, + INSR, + + // Unsigned gather loads. + GLD1, + GLD1_SCALED, + GLD1_UXTW, + GLD1_SXTW, + GLD1_UXTW_SCALED, + GLD1_SXTW_SCALED, + GLD1_IMM, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index d6bf9bcd805ca..fee825422ca4f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -358,6 +358,16 @@ def am_indexed7s128 : ComplexPattern; def am_indexedu6s128 : ComplexPattern; def am_indexeds9s128 : ComplexPattern; +def UImmS2XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 2, SDLoc(N), MVT::i64); +}]>; +def UImmS4XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 4, SDLoc(N), MVT::i64); +}]>; +def UImmS8XForm : SDNodeXFormgetTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64); +}]>; + // uimm5sN predicate - True if the immediate is a multiple of N in the range // [0 * N, 32 * N]. def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; @@ -365,17 +375,20 @@ def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>; def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>; def uimm5s2 : Operand, ImmLeaf= 0 && Imm < (32*2) && ((Imm % 2) == 0); }]> { + [{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }], + UImmS2XForm> { let ParserMatchClass = UImm5s2Operand; let PrintMethod = "printImmScale<2>"; } def uimm5s4 : Operand, ImmLeaf= 0 && Imm < (32*4) && ((Imm % 4) == 0); }]> { + [{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }], + UImmS4XForm> { let ParserMatchClass = UImm5s4Operand; let PrintMethod = "printImmScale<4>"; } def uimm5s8 : Operand, ImmLeaf= 0 && Imm < (32*8) && ((Imm % 8) == 0); }]> { + [{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }], + UImmS8XForm> { let ParserMatchClass = UImm5s8Operand; let PrintMethod = "printImmScale<8>"; } @@ -1473,7 +1486,7 @@ multiclass AuthLoad { (!cast(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>; def : InstAlias(NAME # "writeback") GPR64sp:$wback, GPR64:$Rt, 0)>; + (!cast(NAME # "writeback") GPR64sp:$wback, GPR64:$Rt, 0), 0>; } //--- @@ -10407,9 +10420,9 @@ class CryptoRRTiedop0, bits<2>op1, string asm, string asmops> let Inst{11-10} = op1; } class CryptoRRTied_2Dop0, bits<2>op1, string asm> - : CryptoRRTied; + : CryptoRRTied; class CryptoRRTied_4Sop0, bits<2>op1, string asm> - : CryptoRRTied; + : CryptoRRTied; class CryptoRRR op0, bits<2>op1, dag oops, dag iops, string asm, string asmops, string cst> @@ -10424,19 +10437,19 @@ class CryptoRRR op0, bits<2>op1, dag oops, dag iops, string asm, } class CryptoRRR_2D op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.2d, $Vn.2d, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "">; class CryptoRRRTied_2D op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.2d, $Vn.2d, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">; class CryptoRRR_4S op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.4s, $Vn.4s, $Vm.4s|.4s\t$Vd, $Vn, $Vm}", "">; class CryptoRRRTied_4S op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.4s, $Vn.4s, $Vm.4s|.4s\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">; class CryptoRRRTied op0, bits<2>op1, string asm> : CryptoRRR; + asm, "{\t$Vd, $Vn, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">; class CryptoRRRRop0, string asm, string asmops> : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm, @@ -10450,15 +10463,18 @@ class CryptoRRRRop0, string asm, string asmops> let Inst{14-10} = Va; } class CryptoRRRR_16Bop0, string asm> - : CryptoRRRR { + : CryptoRRRR { } class CryptoRRRR_4Sop0, string asm> - : CryptoRRRR { + : CryptoRRRR { } class CryptoRRRi6 : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm, - "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> { + "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm" # + "|.2d\t$Vd, $Vn, $Vm, $imm}", "", []> { bits<6> imm; bits<5> Vm; let Inst{24-21} = 0b0100; @@ -10471,7 +10487,8 @@ class CryptoRRRi6 class CryptoRRRi2Tiedop0, bits<2>op1, string asm> : BaseCryptoV82<(outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm), - asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> { + asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm" # + "|.4s\t$Vd, $Vn, $Vm$imm}", "$Vd = $Vdst", []> { bits<2> imm; bits<5> Vm; let Inst{24-21} = 0b0010; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 785345422404f..714007f8aba86 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3571,6 +3571,18 @@ static bool isCombineInstrCandidate64(unsigned Opc) { // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. case AArch64::SUBXri: case AArch64::SUBSXri: + case AArch64::ADDv8i8: + case AArch64::ADDv16i8: + case AArch64::ADDv4i16: + case AArch64::ADDv8i16: + case AArch64::ADDv2i32: + case AArch64::ADDv4i32: + case AArch64::SUBv8i8: + case AArch64::SUBv16i8: + case AArch64::SUBv4i16: + case AArch64::SUBv8i16: + case AArch64::SUBv2i32: + case AArch64::SUBv4i32: return true; default: break; @@ -3713,6 +3725,13 @@ static bool getMaddPatterns(MachineInstr &Root, } }; + auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) { + if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { + Patterns.push_back(Pattern); + Found = true; + } + }; + typedef MachineCombinerPattern MCP; switch (Opc) { @@ -3748,6 +3767,70 @@ static bool getMaddPatterns(MachineInstr &Root, case AArch64::SUBXri: setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); break; + case AArch64::ADDv8i8: + setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1); + setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2); + break; + case AArch64::ADDv16i8: + setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1); + setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2); + break; + case AArch64::ADDv4i16: + setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1); + setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2); + setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1); + setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2); + break; + case AArch64::ADDv8i16: + setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1); + setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2); + setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1); + setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2); + break; + case AArch64::ADDv2i32: + setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1); + setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2); + setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1); + setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2); + break; + case AArch64::ADDv4i32: + setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1); + setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2); + setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1); + setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2); + break; + case AArch64::SUBv8i8: + setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1); + setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2); + break; + case AArch64::SUBv16i8: + setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1); + setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2); + break; + case AArch64::SUBv4i16: + setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1); + setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2); + setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1); + setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2); + break; + case AArch64::SUBv8i16: + setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1); + setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2); + setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1); + setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2); + break; + case AArch64::SUBv2i32: + setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1); + setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2); + setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1); + setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2); + break; + case AArch64::SUBv4i32: + setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1); + setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2); + setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1); + setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2); + break; } return Found; } @@ -3960,6 +4043,46 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv4i32_indexed_OP2: case MachineCombinerPattern::FMLSv4f32_OP2: + case MachineCombinerPattern::MULADDv8i8_OP1: + case MachineCombinerPattern::MULADDv8i8_OP2: + case MachineCombinerPattern::MULADDv16i8_OP1: + case MachineCombinerPattern::MULADDv16i8_OP2: + case MachineCombinerPattern::MULADDv4i16_OP1: + case MachineCombinerPattern::MULADDv4i16_OP2: + case MachineCombinerPattern::MULADDv8i16_OP1: + case MachineCombinerPattern::MULADDv8i16_OP2: + case MachineCombinerPattern::MULADDv2i32_OP1: + case MachineCombinerPattern::MULADDv2i32_OP2: + case MachineCombinerPattern::MULADDv4i32_OP1: + case MachineCombinerPattern::MULADDv4i32_OP2: + case MachineCombinerPattern::MULSUBv8i8_OP1: + case MachineCombinerPattern::MULSUBv8i8_OP2: + case MachineCombinerPattern::MULSUBv16i8_OP1: + case MachineCombinerPattern::MULSUBv16i8_OP2: + case MachineCombinerPattern::MULSUBv4i16_OP1: + case MachineCombinerPattern::MULSUBv4i16_OP2: + case MachineCombinerPattern::MULSUBv8i16_OP1: + case MachineCombinerPattern::MULSUBv8i16_OP2: + case MachineCombinerPattern::MULSUBv2i32_OP1: + case MachineCombinerPattern::MULSUBv2i32_OP2: + case MachineCombinerPattern::MULSUBv4i32_OP1: + case MachineCombinerPattern::MULSUBv4i32_OP2: + case MachineCombinerPattern::MULADDv4i16_indexed_OP1: + case MachineCombinerPattern::MULADDv4i16_indexed_OP2: + case MachineCombinerPattern::MULADDv8i16_indexed_OP1: + case MachineCombinerPattern::MULADDv8i16_indexed_OP2: + case MachineCombinerPattern::MULADDv2i32_indexed_OP1: + case MachineCombinerPattern::MULADDv2i32_indexed_OP2: + case MachineCombinerPattern::MULADDv4i32_indexed_OP1: + case MachineCombinerPattern::MULADDv4i32_indexed_OP2: + case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: + case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: + case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: + case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: + case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: + case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: + case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: + case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: return true; } // end switch (Pattern) return false; @@ -4063,6 +4186,30 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, return MUL; } +/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate +/// instructions. +/// +/// \see genFusedMultiply +static MachineInstr *genFusedMultiplyAcc( + MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, + MachineInstr &Root, SmallVectorImpl &InsInstrs, + unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { + return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, + FMAInstKind::Accumulator); +} + +/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate +/// instructions. +/// +/// \see genFusedMultiply +static MachineInstr *genFusedMultiplyIdx( + MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, + MachineInstr &Root, SmallVectorImpl &InsInstrs, + unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { + return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, + FMAInstKind::Indexed); +} + /// genMaddR - Generate madd instruction and combine mul and add using /// an extra virtual register /// Example - an ADD intermediate needs to be stored in a register: @@ -4302,6 +4449,211 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } + + case MachineCombinerPattern::MULADDv8i8_OP1: + Opc = AArch64::MLAv8i8; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv8i8_OP2: + Opc = AArch64::MLAv8i8; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv16i8_OP1: + Opc = AArch64::MLAv16i8; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv16i8_OP2: + Opc = AArch64::MLAv16i8; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i16_OP1: + Opc = AArch64::MLAv4i16; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i16_OP2: + Opc = AArch64::MLAv4i16; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv8i16_OP1: + Opc = AArch64::MLAv8i16; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv8i16_OP2: + Opc = AArch64::MLAv8i16; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv2i32_OP1: + Opc = AArch64::MLAv2i32; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv2i32_OP2: + Opc = AArch64::MLAv2i32; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i32_OP1: + Opc = AArch64::MLAv4i32; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i32_OP2: + Opc = AArch64::MLAv4i32; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + + case MachineCombinerPattern::MULSUBv8i8_OP1: + Opc = AArch64::MLSv8i8; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv8i8_OP2: + Opc = AArch64::MLSv8i8; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv16i8_OP1: + Opc = AArch64::MLSv16i8; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv16i8_OP2: + Opc = AArch64::MLSv16i8; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i16_OP1: + Opc = AArch64::MLSv4i16; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i16_OP2: + Opc = AArch64::MLSv4i16; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv8i16_OP1: + Opc = AArch64::MLSv8i16; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv8i16_OP2: + Opc = AArch64::MLSv8i16; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv2i32_OP1: + Opc = AArch64::MLSv2i32; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv2i32_OP2: + Opc = AArch64::MLSv2i32; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i32_OP1: + Opc = AArch64::MLSv4i32; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i32_OP2: + Opc = AArch64::MLSv4i32; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + + case MachineCombinerPattern::MULADDv4i16_indexed_OP1: + Opc = AArch64::MLAv4i16_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i16_indexed_OP2: + Opc = AArch64::MLAv4i16_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv8i16_indexed_OP1: + Opc = AArch64::MLAv8i16_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv8i16_indexed_OP2: + Opc = AArch64::MLAv8i16_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv2i32_indexed_OP1: + Opc = AArch64::MLAv2i32_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv2i32_indexed_OP2: + Opc = AArch64::MLAv2i32_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i32_indexed_OP1: + Opc = AArch64::MLAv4i32_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULADDv4i32_indexed_OP2: + Opc = AArch64::MLAv4i32_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + + case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: + Opc = AArch64::MLSv4i16_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: + Opc = AArch64::MLSv4i16_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: + Opc = AArch64::MLSv8i16_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: + Opc = AArch64::MLSv8i16_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: + Opc = AArch64::MLSv2i32_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: + Opc = AArch64::MLSv2i32_indexed; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: + Opc = AArch64::MLSv4i32_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: + Opc = AArch64::MLSv4i32_indexed; + RC = &AArch64::FPR128RegClass; + MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + // Floating Point Support case MachineCombinerPattern::FMULADDH_OP1: Opc = AArch64::FMADDHrrr; @@ -5060,8 +5412,99 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { return 0u; } -outliner::OutlinedFunction -AArch64InstrInfo::getOutliningCandidateInfo( +static bool +outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, + const outliner::Candidate &b) { + const Function &Fa = a.getMF()->getFunction(); + const Function &Fb = b.getMF()->getFunction(); + + // If none of the functions have the "sign-return-address" attribute their + // signing behaviour is equal + if (!Fa.hasFnAttribute("sign-return-address") && + !Fb.hasFnAttribute("sign-return-address")) { + return true; + } + + // If both functions have the "sign-return-address" attribute their signing + // behaviour is equal, if the values of the attributes are equal + if (Fa.hasFnAttribute("sign-return-address") && + Fb.hasFnAttribute("sign-return-address")) { + StringRef ScopeA = + Fa.getFnAttribute("sign-return-address").getValueAsString(); + StringRef ScopeB = + Fb.getFnAttribute("sign-return-address").getValueAsString(); + return ScopeA.equals(ScopeB); + } + + // If function B doesn't have the "sign-return-address" attribute but A does, + // the functions' signing behaviour is equal if A's value for + // "sign-return-address" is "none" and vice versa. + if (Fa.hasFnAttribute("sign-return-address")) { + StringRef ScopeA = + Fa.getFnAttribute("sign-return-address").getValueAsString(); + return ScopeA.equals("none"); + } + + if (Fb.hasFnAttribute("sign-return-address")) { + StringRef ScopeB = + Fb.getFnAttribute("sign-return-address").getValueAsString(); + return ScopeB.equals("none"); + } + + llvm_unreachable("Unkown combination of sign-return-address attributes"); +} + +static bool +outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, + const outliner::Candidate &b) { + const Function &Fa = a.getMF()->getFunction(); + const Function &Fb = b.getMF()->getFunction(); + + // If none of the functions have the "sign-return-address-key" attribute + // their keys are equal + if (!Fa.hasFnAttribute("sign-return-address-key") && + !Fb.hasFnAttribute("sign-return-address-key")) { + return true; + } + + // If both functions have the "sign-return-address-key" attribute their + // keys are equal if the values of "sign-return-address-key" are equal + if (Fa.hasFnAttribute("sign-return-address-key") && + Fb.hasFnAttribute("sign-return-address-key")) { + StringRef KeyA = + Fa.getFnAttribute("sign-return-address-key").getValueAsString(); + StringRef KeyB = + Fb.getFnAttribute("sign-return-address-key").getValueAsString(); + return KeyA.equals(KeyB); + } + + // If B doesn't have the "sign-return-address-key" attribute, both keys are + // equal, if function a has the default key (a_key) + if (Fa.hasFnAttribute("sign-return-address-key")) { + StringRef KeyA = + Fa.getFnAttribute("sign-return-address-key").getValueAsString(); + return KeyA.equals_lower("a_key"); + } + + if (Fb.hasFnAttribute("sign-return-address-key")) { + StringRef KeyB = + Fb.getFnAttribute("sign-return-address-key").getValueAsString(); + return KeyB.equals_lower("a_key"); + } + + llvm_unreachable("Unkown combination of sign-return-address-key attributes"); +} + +static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, + const outliner::Candidate &b) { + const AArch64Subtarget &SubtargetA = + a.getMF()->getSubtarget(); + const AArch64Subtarget &SubtargetB = + b.getMF()->getSubtarget(); + return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); +} + +outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; unsigned SequenceSize = @@ -5069,12 +5512,99 @@ AArch64InstrInfo::getOutliningCandidateInfo( [this](unsigned Sum, const MachineInstr &MI) { return Sum + getInstSizeInBytes(MI); }); + unsigned NumBytesToCreateFrame = 0; + + // We only allow outlining for functions having exactly matching return + // address signing attributes, i.e., all share the same value for the + // attribute "sign-return-address" and all share the same type of key they + // are signed with. + // Additionally we require all functions to simultaniously either support + // v8.3a features or not. Otherwise an outlined function could get signed + // using dedicated v8.3 instructions and a call from a function that doesn't + // support v8.3 instructions would therefore be invalid. + if (std::adjacent_find( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [](const outliner::Candidate &a, const outliner::Candidate &b) { + // Return true if a and b are non-equal w.r.t. return address + // signing or support of v8.3a features + if (outliningCandidatesSigningScopeConsensus(a, b) && + outliningCandidatesSigningKeyConsensus(a, b) && + outliningCandidatesV8_3OpsConsensus(a, b)) { + return false; + } + return true; + }) != RepeatedSequenceLocs.end()) { + return outliner::OutlinedFunction(); + } + + // Since at this point all candidates agree on their return address signing + // picking just one is fine. If the candidate functions potentially sign their + // return addresses, the outlined function should do the same. Note that in + // the case of "sign-return-address"="non-leaf" this is an assumption: It is + // not certainly true that the outlined function will have to sign its return + // address but this decision is made later, when the decision to outline + // has already been made. + // The same holds for the number of additional instructions we need: On + // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is + // necessary. However, at this point we don't know if the outlined function + // will have a RET instruction so we assume the worst. + const Function &FCF = FirstCand.getMF()->getFunction(); + const TargetRegisterInfo &TRI = getRegisterInfo(); + if (FCF.hasFnAttribute("sign-return-address")) { + // One PAC and one AUT instructions + NumBytesToCreateFrame += 8; + + // We have to check if sp modifying instructions would get outlined. + // If so we only allow outlining if sp is unchanged overall, so matching + // sub and add instructions are okay to outline, all other sp modifications + // are not + auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) { + int SPValue = 0; + MachineBasicBlock::iterator MBBI = C.front(); + for (;;) { + if (MBBI->modifiesRegister(AArch64::SP, &TRI)) { + switch (MBBI->getOpcode()) { + case AArch64::ADDXri: + case AArch64::ADDWri: + assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); + assert(MBBI->getOperand(2).isImm() && + "Expected operand to be immediate"); + SPValue += MBBI->getOperand(2).getImm(); + break; + case AArch64::SUBXri: + case AArch64::SUBWri: + assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); + assert(MBBI->getOperand(2).isImm() && + "Expected operand to be immediate"); + SPValue -= MBBI->getOperand(2).getImm(); + break; + default: + return true; + } + } + if (MBBI == C.back()) + break; + ++MBBI; + } + if (SPValue) + return true; + return false; + }; + // Remove candidates with illegal stack modifying instructions + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + hasIllegalSPModification), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } // Properties about candidate MBBs that hold for all of them. unsigned FlagsSetInAll = 0xF; // Compute liveness information for each candidate, and set FlagsSetInAll. - const TargetRegisterInfo &TRI = getRegisterInfo(); std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; @@ -5130,7 +5660,7 @@ AArch64InstrInfo::getOutliningCandidateInfo( }; unsigned FrameID = MachineOutlinerDefault; - unsigned NumBytesToCreateFrame = 4; + NumBytesToCreateFrame += 4; bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); @@ -5399,6 +5929,19 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, MachineFunction *MF = MBB->getParent(); AArch64FunctionInfo *FuncInfo = MF->getInfo(); + // Don't outline anything used for return address signing. The outlined + // function will get signed later if needed + switch (MI.getOpcode()) { + case AArch64::PACIASP: + case AArch64::PACIBSP: + case AArch64::AUTIASP: + case AArch64::AUTIBSP: + case AArch64::RETAA: + case AArch64::RETAB: + case AArch64::EMITBKEY: + return outliner::InstrType::Illegal; + } + // Don't outline LOHs. if (FuncInfo->getLOHRelated().count(&MI)) return outliner::InstrType::Illegal; @@ -5551,6 +6094,59 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { } } +static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, + bool ShouldSignReturnAddr, + bool ShouldSignReturnAddrWithAKey) { + if (ShouldSignReturnAddr) { + MachineBasicBlock::iterator MBBPAC = MBB.begin(); + MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator(); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL; + + if (MBBAUT != MBB.end()) + DL = MBBAUT->getDebugLoc(); + + // At the very beginning of the basic block we insert the following + // depending on the key type + // + // a_key: b_key: + // PACIASP EMITBKEY + // CFI_INSTRUCTION PACIBSP + // CFI_INSTRUCTION + if (ShouldSignReturnAddrWithAKey) { + BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP)) + .setMIFlag(MachineInstr::FrameSetup); + } else { + BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY)) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP)) + .setMIFlag(MachineInstr::FrameSetup); + } + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + + // If v8.3a features are available we can replace a RET instruction by + // RETAA or RETAB and omit the AUT instructions + if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() && + MBBAUT->getOpcode() == AArch64::RET) { + BuildMI(MBB, MBBAUT, DL, + TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA + : AArch64::RETAB)) + .copyImplicitOps(*MBBAUT); + MBB.erase(MBBAUT); + } else { + BuildMI(MBB, MBBAUT, DL, + TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP + : AArch64::AUTIBSP)) + .setMIFlag(MachineInstr::FrameDestroy); + } + } +} + void AArch64InstrInfo::buildOutlinedFrame( MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const { @@ -5566,16 +6162,19 @@ void AArch64InstrInfo::buildOutlinedFrame( TailOpcode = AArch64::TCRETURNriALL; } MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) - .add(Call->getOperand(0)) - .addImm(0); + .add(Call->getOperand(0)) + .addImm(0); MBB.insert(MBB.end(), TC); Call->eraseFromParent(); } + bool IsLeafFunction = true; + // Is there a call in the outlined range? - auto IsNonTailCall = [](MachineInstr &MI) { + auto IsNonTailCall = [](const MachineInstr &MI) { return MI.isCall() && !MI.isReturn(); }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { // Fix up the instructions in the range, since we're going to modify the // stack. @@ -5583,6 +6182,8 @@ void AArch64InstrInfo::buildOutlinedFrame( "Can only fix up stack references once"); fixupPostOutline(MBB); + IsLeafFunction = false; + // LR has to be a live in so that we can save it. MBB.addLiveIn(AArch64::LR); @@ -5629,16 +6230,47 @@ void AArch64InstrInfo::buildOutlinedFrame( Et = MBB.insert(Et, LDRXpost); } + // If a bunch of candidates reach this point they must agree on their return + // address signing. It is therefore enough to just consider the signing + // behaviour of one of them + const Function &CF = OF.Candidates.front().getMF()->getFunction(); + bool ShouldSignReturnAddr = false; + if (CF.hasFnAttribute("sign-return-address")) { + StringRef Scope = + CF.getFnAttribute("sign-return-address").getValueAsString(); + if (Scope.equals("all")) + ShouldSignReturnAddr = true; + else if (Scope.equals("non-leaf") && !IsLeafFunction) + ShouldSignReturnAddr = true; + } + + // a_key is the default + bool ShouldSignReturnAddrWithAKey = true; + if (CF.hasFnAttribute("sign-return-address-key")) { + const StringRef Key = + CF.getFnAttribute("sign-return-address-key").getValueAsString(); + // Key can either be a_key or b_key + assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) && + "Return address signing key must be either a_key or b_key"); + ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key"); + } + // If this is a tail call outlined function, then there's already a return. if (OF.FrameConstructionID == MachineOutlinerTailCall || - OF.FrameConstructionID == MachineOutlinerThunk) + OF.FrameConstructionID == MachineOutlinerThunk) { + signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, + ShouldSignReturnAddrWithAKey); return; + } // It's not a tail call, so we have to insert the return ourselves. MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) .addReg(AArch64::LR, RegState::Undef); MBB.insert(MBB.end(), ret); + signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, + ShouldSignReturnAddrWithAKey); + // Did we have to modify the stack by saving the link register? if (OF.FrameConstructionID != MachineOutlinerDefault) return; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 80cf31ff3d56b..48872dc09cdb5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -214,6 +214,7 @@ def SDT_AArch64FCmp : SDTypeProfile<0, 2, SDTCisSameAs<0, 1>]>; def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; @@ -262,15 +263,17 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, // non-extending masked load fragment. def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast(N)->isUnindexed(); }]>; // sign extending masked load fragments. def asext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def),[{ - return cast(N)->getExtensionType() == ISD::EXTLOAD || - cast(N)->getExtensionType() == ISD::SEXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ + return (cast(N)->getExtensionType() == ISD::EXTLOAD || + cast(N)->getExtensionType() == ISD::SEXTLOAD) && + cast(N)->isUnindexed(); }]>; def asext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -290,8 +293,9 @@ def asext_masked_load_i32 : // zero extending masked load fragments. def zext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::ZEXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD && + cast(N)->isUnindexed(); }]>; def zext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -312,14 +316,16 @@ def zext_masked_load_i32 : // non-truncating masked store fragment. def nontrunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ - return !cast(N)->isTruncatingStore(); + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return !cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); }]>; // truncating masked store fragments. def trunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ - return cast(N)->isTruncatingStore(); + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); }]>; def trunc_masked_store_i8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), @@ -396,6 +402,8 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; +def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; + def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; @@ -752,6 +760,29 @@ defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla", null_frag>; +let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { + def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), + (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; + def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), + (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; + def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), + (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; + def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), + (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; +} +let Predicates = [HasComplxNum, HasNEON] in { + def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; + def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; + foreach Ty = [v4f32, v2f64] in { + def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), + (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; + def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), + (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; + } +} + // v8.3a Pointer Authentication // These instructions inhabit part of the hint space and so can be used for // armv8 targets @@ -3793,10 +3824,11 @@ defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; -defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", - TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; -defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", - TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; + +// MLA and MLS are generated in MachineCombine +defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; +defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; + defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", @@ -5526,10 +5558,11 @@ def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; -defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", - TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; -defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", - TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; + +// Generated by MachineCombine +defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; +defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; + defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a4ea2cab13eba..c75208e4aaca6 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,6 +10,24 @@ // //===----------------------------------------------------------------------===// +def SDT_AArch64_GLD1 : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>, + SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> +]>; + +def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>, + SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> +]>; + +def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; + let Predicates = [HasSVE] in { def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">; @@ -28,7 +46,7 @@ let Predicates = [HasSVE] in { defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>; defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>; defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>; - defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", int_aarch64_sve_bic>; + defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", int_aarch64_sve_bic_base>; defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", int_aarch64_sve_add>; defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>; @@ -37,7 +55,7 @@ let Predicates = [HasSVE] in { defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_or>; defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_xor>; defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic_pred>; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>; defm ADD_ZI : sve_int_arith_imm0<0b000, "add">; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">; @@ -199,14 +217,14 @@ let Predicates = [HasSVE] in { defm SPLICE_ZPZ : sve_int_perm_splice<"splice">; defm COMPACT_ZPZ : sve_int_perm_compact<"compact">; - defm INSR_ZR : sve_int_perm_insrs<"insr">; - defm INSR_ZV : sve_int_perm_insrv<"insr">; + defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; + defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; def EXT_ZZI : sve_int_perm_extract_i<"ext">; - defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">; - defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb">; - defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh">; - defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw">; + defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>; + defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>; + defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>; + defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>; defm REV_PP : sve_int_perm_reverse_p<"rev">; defm REV_ZZ : sve_int_perm_reverse_z<"rev">; @@ -244,21 +262,21 @@ let Predicates = [HasSVE] in { defm PFIRST : sve_int_pfirst<0b00000, "pfirst">; defm PNEXT : sve_int_pnext<0b00110, "pnext">; - def AND_PPzPP : sve_int_pred_log<0b0000, "and">; - def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">; - def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">; - def SEL_PPPP : sve_int_pred_log<0b0011, "sel">; - def ANDS_PPzPP : sve_int_pred_log<0b0100, "ands">; - def BICS_PPzPP : sve_int_pred_log<0b0101, "bics">; - def EORS_PPzPP : sve_int_pred_log<0b0110, "eors">; - def ORR_PPzPP : sve_int_pred_log<0b1000, "orr">; - def ORN_PPzPP : sve_int_pred_log<0b1001, "orn">; - def NOR_PPzPP : sve_int_pred_log<0b1010, "nor">; - def NAND_PPzPP : sve_int_pred_log<0b1011, "nand">; - def ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs">; - def ORNS_PPzPP : sve_int_pred_log<0b1101, "orns">; - def NORS_PPzPP : sve_int_pred_log<0b1110, "nors">; - def NANDS_PPzPP : sve_int_pred_log<0b1111, "nands">; + defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and>; + defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic>; + defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor>; + defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>; + defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", int_aarch64_sve_ands>; + defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", int_aarch64_sve_bics>; + defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", int_aarch64_sve_eors>; + defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr>; + defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn>; + defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor>; + defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand>; + defm ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs", int_aarch64_sve_orrs>; + defm ORNS_PPzPP : sve_int_pred_log<0b1101, "orns", int_aarch64_sve_orns>; + defm NORS_PPzPP : sve_int_pred_log<0b1110, "nors", int_aarch64_sve_nors>; + defm NANDS_PPzPP : sve_int_pred_log<0b1111, "nands", int_aarch64_sve_nands>; defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">; defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">; @@ -402,115 +420,115 @@ let Predicates = [HasSVE] in { // Gathers using unscaled 32-bit offsets, e.g. // ld1h z0.s, p0/z, [x0, z0.s, uxtw] - defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; - defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; - defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; - defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; - defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; - defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; - defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; - defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; - defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; - defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; + defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; // Gathers using scaled 32-bit offsets, e.g. // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1] - defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; - defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; - defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; - defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; - defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; - defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; - - // Gathers using scaled 32-bit pointers with offset, e.g. + defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; + defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; + + // Gathers using 32-bit pointers with scaled offset, e.g. // ld1h z0.s, p0/z, [z0.s, #16] - defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31>; - defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31>; - defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31>; - defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31>; - defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2>; - defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2>; - defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2>; - defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2>; - defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4>; - defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4>; - - // Gathers using scaled 64-bit pointers with offset, e.g. + defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, null_frag, nxv4i8>; + defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv4i8>; + defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>; + defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv4i8>; + defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, null_frag, nxv4i16>; + defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv4i16>; + defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>; + defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv4i16>; + defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>; + defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv4i32>; + + // Gathers using 64-bit pointers with scaled offset, e.g. // ld1h z0.d, p0/z, [z0.d, #16] - defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31>; - defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31>; - defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31>; - defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31>; - defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2>; - defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2>; - defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2>; - defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2>; - defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4>; - defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>; - defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>; - defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>; - defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>; - defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>; + defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, null_frag, nxv2i8>; + defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv2i8>; + defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>; + defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv2i8>; + defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, null_frag, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, null_frag, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, null_frag, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, null_frag, nxv2i64>; // Gathers using unscaled 64-bit offsets, e.g. // ld1h z0.d, p0/z, [x0, z0.d] - defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">; - defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">; - defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">; - defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">; - defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">; - defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">; - defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">; - defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">; - defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">; - defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">; - defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">; - defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">; - defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">; - defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">; + defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", null_frag, nxv2i8>; + defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", null_frag, nxv2i8>; + defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>; + defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", null_frag, nxv2i8>; + defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", null_frag, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", null_frag, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", null_frag, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", null_frag, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", null_frag, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", null_frag, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", null_frag, nxv2i64>; // Gathers using scaled 64-bit offsets, e.g. // ld1h z0.d, p0/z, [x0, z0.d, lsl #1] - defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>; - defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>; - defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>; - defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>; - defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>; - defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>; - defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>; - defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>; - defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>; - defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>; + defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", null_frag, ZPR64ExtLSL16, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", null_frag, ZPR64ExtLSL16, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", null_frag, ZPR64ExtLSL16, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", null_frag, ZPR64ExtLSL32, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", null_frag, ZPR64ExtLSL32, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", null_frag, ZPR64ExtLSL32, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", null_frag, ZPR64ExtLSL64, nxv2i64>; // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g. // ld1h z0.d, p0/z, [x0, z0.d, uxtw] - defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; - defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; - defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; - defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; - defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; - defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g. // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] - defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; - defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh",ZPR64ExtSXTW16, ZPR64ExtUXTW16>; - defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; - defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; - defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; - defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw",ZPR64ExtSXTW32, ZPR64ExtUXTW32>; - defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; - defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; - defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; - defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; // Non-temporal contiguous loads (register + immediate) defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; @@ -751,15 +769,15 @@ let Predicates = [HasSVE] in { defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">; defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">; - defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">; - defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">; - defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">; - defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">; + defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; + defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>; + defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>; + defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>; - defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">; - defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">; - defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">; - defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">; + defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; + defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>; + defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>; + defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>; def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; @@ -770,11 +788,11 @@ let Predicates = [HasSVE] in { def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; - defm CNTB_XPiI : sve_int_count<0b000, "cntb">; - defm CNTH_XPiI : sve_int_count<0b010, "cnth">; - defm CNTW_XPiI : sve_int_count<0b100, "cntw">; - defm CNTD_XPiI : sve_int_count<0b110, "cntd">; - defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp">; + defm CNTB_XPiI : sve_int_count<0b000, "cntb", int_aarch64_sve_cntb>; + defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>; + defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>; + defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>; + defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>; defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">; defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">; @@ -876,53 +894,53 @@ let Predicates = [HasSVE] in { defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">; defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">; defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">; - - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">; - defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">; - defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">; - defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">; - - defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">; - defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; - defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; - - def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; - def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; - def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; - def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; - def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; - def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; - def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; - def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; - def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; - def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; - def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; - def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; - def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; - def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; - def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; - def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; - def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; - def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; - def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; - def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; - def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; - def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; - def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; - def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; - def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>; + + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>; + defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>; + defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>; + defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>; + + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; + defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; + defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; + + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv16i1, nxv4f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>; @@ -1419,32 +1437,32 @@ let Predicates = [HasSVE2] in { defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">; // SVE2 floating-point base 2 logarithm as integer - defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; + defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>; // SVE2 floating-point convert precision - defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; - defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; - defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; + defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">; + defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">; + defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">; + defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">; // SVE2 floating-point pairwise operations - defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; - defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">; - defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">; - defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp">; - defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp">; + defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp", int_aarch64_sve_faddp>; + defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp", int_aarch64_sve_fmaxnmp>; + defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp", int_aarch64_sve_fminnmp>; + defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp", int_aarch64_sve_fmaxp>; + defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp", int_aarch64_sve_fminp>; // SVE2 floating-point multiply-add long (indexed) - def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">; - def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">; - def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">; - def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">; + defm FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb", int_aarch64_sve_fmlalb_lane>; + defm FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt", int_aarch64_sve_fmlalt_lane>; + defm FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb", int_aarch64_sve_fmlslb_lane>; + defm FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt", int_aarch64_sve_fmlslt_lane>; // SVE2 floating-point multiply-add long - def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">; - def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">; - def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">; - def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">; + defm FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb", int_aarch64_sve_fmlalb>; + defm FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt", int_aarch64_sve_fmlalt>; + defm FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb", int_aarch64_sve_fmlslb>; + defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt", int_aarch64_sve_fmlslt>; // SVE2 bitwise ternary operations defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">; @@ -1493,15 +1511,16 @@ let Predicates = [HasSVE2] in { defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">; // SVE2 integer compare scalar count and limit - defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">; - defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">; - defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">; - defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">; - - defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">; - defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">; - defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">; - defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">; + defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", null_frag>; + defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", null_frag>; + defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", null_frag>; + defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", null_frag>; + + defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", null_frag>; + defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", null_frag>; + defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", null_frag>; + defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", null_frag>; + // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 908d72dbfc3eb..ddbddb9607d75 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -156,6 +156,12 @@ int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, if (BitSize == 0) return TTI::TCC_Free; + // Most (all?) AArch64 intrinsics do not support folding immediates into the + // selected instruction, so we compute the materialization cost for the + // immediate directly. + if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv) + return AArch64TTIImpl::getIntImmCost(Imm, Ty); + switch (IID) { default: return TTI::TCC_Free; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 855510e7f5568..96a0117c9551a 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -304,8 +304,29 @@ class SVE_4_Op_Pat; +class SVE_3_Op_Imm_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))), + (inst $Op1, $Op2, ImmTy:$Op3)>; + +class SVE_4_Op_Imm_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))), + (inst $Op1, $Op2, $Op3, ImmTy:$Op4)>; + def SVEDup0Undef : ComplexPattern; +// +// Common but less generic patterns. +// + +class SVE_1_Op_AllActive_Pat +: Pat<(vtd (op vt1:$Op1)), + (inst (IMPLICIT_DEF), (ptrue 31), $Op1)>; + //===----------------------------------------------------------------------===// // SVE Predicate Misc Group //===----------------------------------------------------------------------===// @@ -483,11 +504,17 @@ class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, let Inst{4-0} = Rd; } -multiclass sve_int_pcount_pred opc, string asm> { +multiclass sve_int_pcount_pred opc, string asm, + SDPatternOperator int_op> { def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -512,13 +539,16 @@ class sve_int_count opc, string asm> let Inst{4-0} = Rd; } -multiclass sve_int_count opc, string asm> { +multiclass sve_int_count opc, string asm, SDPatternOperator op> { def NAME : sve_int_count; def : InstAlias(NAME) GPR64:$Rd, sve_pred_enum:$pattern, 1), 1>; def : InstAlias(NAME) GPR64:$Rd, 0b11111, 1), 2>; + + def : Pat<(i64 (op sve_pred_enum:$pattern)), + (!cast(NAME) sve_pred_enum:$pattern, 1)>; } class sve_int_countvlv opc, string asm, ZPRRegOp zprty> @@ -888,14 +918,18 @@ class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty, let Constraints = "$Zdn = $_Zdn"; let DestructiveInstType = Destructive; - let ElementSize = ElementSizeNone; } -multiclass sve_int_perm_insrs { +multiclass sve_int_perm_insrs { def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>; def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>; def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>; def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_perm_insrv sz8_64, string asm, ZPRRegOp zprty, @@ -914,14 +948,17 @@ class sve_int_perm_insrv sz8_64, string asm, ZPRRegOp zprty, let Constraints = "$Zdn = $_Zdn"; let DestructiveInstType = Destructive; - let ElementSize = ElementSizeNone; } -multiclass sve_int_perm_insrv { +multiclass sve_int_perm_insrv { def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>; def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>; def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>; def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -1032,6 +1069,16 @@ class sve_int_pred_log opc, string asm> !strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm")); let Defs = !if(!eq (opc{2}, 1), [NZCV], []); + +} + +multiclass sve_int_pred_log opc, string asm, SDPatternOperator op> { + def NAME : sve_int_pred_log; + + def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; } @@ -1633,18 +1680,26 @@ class sve2_fp_convert_precision opc, string asm, let Constraints = "$Zd = $_Zd"; } -multiclass sve2_fp_convert_down_narrow { +multiclass sve2_fp_convert_down_narrow { def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>; def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(op # _f16f32), nxv8f16, nxv16i1, nxv4f32, !cast(NAME # _StoH)>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } -multiclass sve2_fp_convert_up_long { +multiclass sve2_fp_convert_up_long { def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>; def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>; + + def : SVE_3_Op_Pat(op # _f32f16), nxv4f32, nxv16i1, nxv8f16, !cast(NAME # _HtoS)>; + def : SVE_3_Op_Pat(op # _f64f32), nxv2f64, nxv16i1, nxv4f32, !cast(NAME # _StoD)>; } -multiclass sve2_fp_convert_down_odd_rounding { +multiclass sve2_fp_convert_down_odd_rounding_top { def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } //===----------------------------------------------------------------------===// @@ -1674,10 +1729,14 @@ class sve2_fp_pairwise_pred sz, bits<3> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve2_fp_pairwise_pred opc, string asm> { +multiclass sve2_fp_pairwise_pred opc, string asm, SDPatternOperator op> { def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>; def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>; def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -1686,7 +1745,7 @@ multiclass sve2_fp_pairwise_pred opc, string asm> { class sve2_fp_mla_long_by_indexed_elem opc, string asm> : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, - VectorIndexH:$iop), + VectorIndexH32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { @@ -1710,6 +1769,12 @@ class sve2_fp_mla_long_by_indexed_elem opc, string asm> let ElementSize = ElementSizeNone; } +multiclass sve2_fp_mla_long_by_indexed_elem opc, string asm, + SDPatternOperator op> { + def NAME : sve2_fp_mla_long_by_indexed_elem; + def : SVE_4_Op_Imm_Pat(NAME)>; +} + //===----------------------------------------------------------------------===// // SVE2 Floating Point Widening Multiply-Add Group //===----------------------------------------------------------------------===// @@ -1736,6 +1801,11 @@ class sve2_fp_mla_long opc, string asm> let ElementSize = ElementSizeNone; } +multiclass sve2_fp_mla_long opc, string asm, SDPatternOperator op> { + def NAME : sve2_fp_mla_long; + def : SVE_3_Op_Pat(NAME)>; +} + //===----------------------------------------------------------------------===// // SVE Stack Allocation Group //===----------------------------------------------------------------------===// @@ -1830,6 +1900,16 @@ class sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, let ElementSize = size; } +multiclass sve_fp_2op_p_zd opc, string asm, + RegisterOperand i_zprtype, + RegisterOperand o_zprtype, + SDPatternOperator op, ValueType vt1, + ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { + def NAME : sve_fp_2op_p_zd; + + def : SVE_3_Op_Pat(NAME)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; @@ -1840,10 +1920,19 @@ multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve2_fp_flogb { +multiclass sve2_fp_flogb { def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>; def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; +} + +multiclass sve2_fp_convert_down_odd_rounding { + def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } //===----------------------------------------------------------------------===// @@ -3560,7 +3649,8 @@ class sve_int_cterm } class sve_int_while_rr sz8_64, bits<4> opc, string asm, - RegisterClass gprty, PPRRegOp pprty> + RegisterClass gprty, PPRRegOp pprty, + ValueType vt, SDPatternOperator op> : I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm), asm, "\t$Pd, $Rn, $Rm", "", []>, Sched<[]> { @@ -3580,18 +3670,28 @@ class sve_int_while_rr sz8_64, bits<4> opc, string asm, let Defs = [NZCV]; } -multiclass sve_int_while4_rr opc, string asm> { - def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; - def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; - def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; - def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; +multiclass sve_int_while4_rr opc, string asm, SDPatternOperator op> { + def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8, nxv16i1, op>; + def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16, nxv8i1, op>; + def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32, nxv4i1, op>; + def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64, nxv2i1, op>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve_int_while8_rr opc, string asm> { - def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; - def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; - def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; - def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; +multiclass sve_int_while8_rr opc, string asm, SDPatternOperator op> { + def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8, nxv16i1, op>; + def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16, nxv8i1, op>; + def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32, nxv4i1, op>; + def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64, nxv2i1, op>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve2_int_while_rr sz8_64, bits<1> rw, string asm, @@ -3906,7 +4006,8 @@ multiclass sve_int_bin_pred_shift_imm_left opc, string asm> { } } -multiclass sve_int_bin_pred_shift_imm_right opc, string asm> { +multiclass sve_int_bin_pred_shift_imm_right opc, string asm, + SDPatternOperator op = null_frag> { def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, ElementSizeB>; def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, @@ -3922,6 +4023,11 @@ multiclass sve_int_bin_pred_shift_imm_right opc, string asm> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, @@ -3948,17 +4054,28 @@ class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift opc, string asm> { +multiclass sve_int_bin_pred_shift opc, string asm, + SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>; def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>; def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>; def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_shift_wide opc, string asm> { +multiclass sve_int_bin_pred_shift_wide opc, string asm, + SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>; def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; } //===----------------------------------------------------------------------===// @@ -4759,26 +4876,46 @@ class sve_int_perm_rev sz8_64, bits<2> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve_int_perm_rev_rbit { +multiclass sve_int_perm_rev_rbit { def _B : sve_int_perm_rev<0b00, 0b11, asm, ZPR8>; def _H : sve_int_perm_rev<0b01, 0b11, asm, ZPR16>; def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>; def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_perm_rev_revb { +multiclass sve_int_perm_rev_revb { def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>; def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>; def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; + + def : SVE_1_Op_AllActive_Pat(NAME # _H), PTRUE_H>; + def : SVE_1_Op_AllActive_Pat(NAME # _S), PTRUE_S>; + def : SVE_1_Op_AllActive_Pat(NAME # _D), PTRUE_D>; } -multiclass sve_int_perm_rev_revh { +multiclass sve_int_perm_rev_revh { def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>; def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_perm_rev_revw { +multiclass sve_int_perm_rev_revw { def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _D)>; } class sve_int_perm_cpy_r sz8_64, string asm, ZPRRegOp zprty, @@ -5215,8 +5352,11 @@ class sve_mem_32b_gld_sv opc, bit xs, bit scaled, string asm, } multiclass sve_mem_32b_gld_sv_32_scaled opc, string asm, + SDPatternOperator sxtw_op, + SDPatternOperator uxtw_op, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + ValueType vt> { def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv; def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv; @@ -5224,11 +5364,19 @@ multiclass sve_mem_32b_gld_sv_32_scaled opc, string asm, (!cast(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; + + def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)), + (!cast(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; + def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)), + (!cast(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } multiclass sve_mem_32b_gld_vs_32_unscaled opc, string asm, + SDPatternOperator sxtw_op, + SDPatternOperator uxtw_op, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + ValueType vt> { def _UXTW_REAL : sve_mem_32b_gld_sv; def _SXTW_REAL : sve_mem_32b_gld_sv; @@ -5236,6 +5384,11 @@ multiclass sve_mem_32b_gld_vs_32_unscaled opc, string asm, (!cast(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; + + def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)), + (!cast(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; + def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)), + (!cast(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; } @@ -5263,7 +5416,8 @@ class sve_mem_32b_gld_vi opc, string asm, Operand imm_ty> let Uses = !if(!eq(opc{0}, 1), [FFR], []); } -multiclass sve_mem_32b_gld_vi_32_ptrs opc, string asm, Operand imm_ty> { +multiclass sve_mem_32b_gld_vi_32_ptrs opc, string asm, Operand imm_ty, + SDPatternOperator op, ValueType vt> { def _IMM_REAL : sve_mem_32b_gld_vi; def : InstAlias opc, string asm, Operand imm_ty> { (!cast(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; def : InstAlias(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; + + def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)), + (!cast(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; } class sve_mem_prfm_si msz, string asm> @@ -5507,8 +5664,11 @@ class sve_mem_64b_gld_sv opc, bit xs, bit scaled, bit lsl, string asm, } multiclass sve_mem_64b_gld_sv_32_scaled opc, string asm, + SDPatternOperator sxtw_op, + SDPatternOperator uxtw_op, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + ValueType vt> { def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv; def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv; @@ -5516,11 +5676,19 @@ multiclass sve_mem_64b_gld_sv_32_scaled opc, string asm, (!cast(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; + + def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), + (!cast(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; + def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), + (!cast(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } multiclass sve_mem_64b_gld_vs_32_unscaled opc, string asm, + SDPatternOperator sxtw_op, + SDPatternOperator uxtw_op, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + ValueType vt> { def _UXTW_REAL : sve_mem_64b_gld_sv; def _SXTW_REAL : sve_mem_64b_gld_sv; @@ -5528,21 +5696,34 @@ multiclass sve_mem_64b_gld_vs_32_unscaled opc, string asm, (!cast(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; def : InstAlias(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; + + def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), + (!cast(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; + def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), + (!cast(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; } multiclass sve_mem_64b_gld_sv2_64_scaled opc, string asm, - RegisterOperand zprext> { + SDPatternOperator op, + RegisterOperand zprext, ValueType vt> { def _SCALED_REAL : sve_mem_64b_gld_sv; def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; + + def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)), + (!cast(NAME # _SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } -multiclass sve_mem_64b_gld_vs2_64_unscaled opc, string asm> { +multiclass sve_mem_64b_gld_vs2_64_unscaled opc, string asm, + SDPatternOperator op, ValueType vt> { def _REAL : sve_mem_64b_gld_sv; def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; + + def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)), + (!cast(NAME # _REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>; } class sve_mem_64b_gld_vi opc, string asm, Operand imm_ty> @@ -5569,7 +5750,8 @@ class sve_mem_64b_gld_vi opc, string asm, Operand imm_ty> let Uses = !if(!eq(opc{0}, 1), [FFR], []); } -multiclass sve_mem_64b_gld_vi_64_ptrs opc, string asm, Operand imm_ty> { +multiclass sve_mem_64b_gld_vi_64_ptrs opc, string asm, Operand imm_ty, + SDPatternOperator op, ValueType vt> { def _IMM_REAL : sve_mem_64b_gld_vi; def : InstAlias opc, string asm, Operand imm_ty> { (!cast(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; def : InstAlias(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; + + def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)), + (!cast(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>; } // bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 7a4fcac09ec4d..57c126fe6494b 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -643,6 +643,17 @@ namespace AArch64II { }; } // end namespace AArch64II +namespace AArch64 { +// The number of bits in a SVE register is architecturally defined +// to be a multiple of this value. If has this number of bits, +// a vector can be stored in a SVE register without any +// redundant bits. If has this number of bits divided by P, +// a vector is stored in a SVE register by placing index i +// in index i*P of a vector. The other elements of the +// vector (such as index 1) are undefined. +static constexpr unsigned SVEBitsPerBlock = 128; +} // end namespace AArch64 + } // end namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index 85d1ad3491573..ae87cf08275f0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -32,7 +32,13 @@ enum PartialMappingIdx { PM_VGPR512 = 22, PM_VGPR1024 = 23, PM_SGPR96 = 24, - PM_VGPR96 = 25 + PM_VGPR96 = 25, + PM_AGPR96 = 26, + PM_AGPR32 = 32, + PM_AGPR64 = 33, + PM_AGPR128 = 34, + PM_AGPR512 = 36, + PM_AGPR1024 = 37 }; const RegisterBankInfo::PartialMapping PartMappings[] { @@ -58,7 +64,14 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 512, VGPRRegBank}, {0, 1024, VGPRRegBank}, {0, 96, SGPRRegBank}, - {0, 96, VGPRRegBank} + {0, 96, VGPRRegBank}, + {0, 96, AGPRRegBank}, + + {0, 32, AGPRRegBank}, // AGPR begin + {0, 64, AGPRRegBank}, + {0, 128, AGPRRegBank}, + {0, 512, AGPRRegBank}, + {0, 1024, AGPRRegBank} }; const RegisterBankInfo::ValueMapping ValMappings[] { @@ -94,7 +107,21 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[16], 1}, // 512 {&PartMappings[17], 1}, // 1024 {&PartMappings[18], 1}, - {&PartMappings[19], 1} + {&PartMappings[19], 1}, + {&PartMappings[20], 1}, + + // AGPRs + {nullptr, 0}, + {nullptr, 0}, + {nullptr, 0}, + {nullptr, 0}, + {nullptr, 0}, + {&PartMappings[21], 1}, // 32 + {&PartMappings[22], 1}, // 64 + {&PartMappings[23], 1}, // 128 + {nullptr, 0}, + {&PartMappings[24], 1}, // 512 + {&PartMappings[25], 1} // 1024 }; const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { @@ -122,7 +149,8 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, - VGPRStartIdx = 13 + VGPRStartIdx = 13, + AGPRStartIdx = 27 }; const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, @@ -139,12 +167,32 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1; break; case 96: - assert(BankID != AMDGPU::VCCRegBankID); - Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96; + switch (BankID) { + case AMDGPU::VGPRRegBankID: + Idx = PM_VGPR96; + break; + case AMDGPU::SGPRRegBankID: + Idx = PM_SGPR96; + break; + case AMDGPU::AGPRRegBankID: + Idx = PM_AGPR96; + break; + default: llvm_unreachable("Invalid register bank"); + } break; default: - assert(BankID != AMDGPU::VCCRegBankID); - Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx; + switch (BankID) { + case AMDGPU::VGPRRegBankID: + Idx = VGPRStartIdx; + break; + case AMDGPU::SGPRRegBankID: + Idx = SGPRStartIdx; + break; + case AMDGPU::AGPRRegBankID: + Idx = AGPRStartIdx; + break; + default: llvm_unreachable("Invalid register bank"); + } Idx += Log2_32_Ceil(Size); break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 8dae8b6c932ef..a51d3d74c899f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -106,6 +106,14 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) (void)RBVGPR; assert(&RBVGPR == &AMDGPU::VGPRRegBank); + const RegisterBank &RBAGPR = getRegBank(AMDGPU::AGPRRegBankID); + (void)RBAGPR; + assert(&RBAGPR == &AMDGPU::AGPRRegBank); +} + +static bool isVectorRegisterBank(const RegisterBank &Bank) { + unsigned BankID = Bank.getID(); + return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID; } unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, @@ -113,7 +121,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, unsigned Size) const { // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane? if (Dst.getID() == AMDGPU::SGPRRegBankID && - Src.getID() == AMDGPU::VGPRRegBankID) { + isVectorRegisterBank(Src)) { return std::numeric_limits::max(); } @@ -127,8 +135,8 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, if (Size == 1 && (Dst.getID() == AMDGPU::SCCRegBankID || Dst.getID() == AMDGPU::SGPRRegBankID) && - (Src.getID() == AMDGPU::SGPRRegBankID || - Src.getID() == AMDGPU::VGPRRegBankID || + (isVectorRegisterBank(Src) || + Src.getID() == AMDGPU::SGPRRegBankID || Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits::max(); @@ -136,6 +144,11 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, Src.getID() == AMDGPU::VCCRegBankID) return std::numeric_limits::max(); + // There is no direct copy between AGPRs. + if (Dst.getID() == AMDGPU::AGPRRegBankID && + Src.getID() == AMDGPU::AGPRRegBankID) + return 4; + return RegisterBankInfo::copyCost(Dst, Src, Size); } @@ -169,7 +182,12 @@ const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( if (&RC == &AMDGPU::SReg_1RegClass) return AMDGPU::VCCRegBank; - return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank; + if (TRI->isSGPRClass(&RC)) + return AMDGPU::SGPRRegBank; + if (TRI->isAGPRClass(&RC)) + return AMDGPU::AGPRRegBank; + + return AMDGPU::VGPRRegBank; } template @@ -1908,7 +1926,7 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { continue; Register Reg = MI.getOperand(i).getReg(); if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { - if (Bank->getID() == AMDGPU::VGPRRegBankID) + if (isVectorRegisterBank(*Bank)) return false; assert(Bank->getID() == AMDGPU::SGPRRegBankID || @@ -2072,7 +2090,6 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, unsigned Default) const { - const RegisterBank *Bank = getRegBank(Reg, MRI, TRI); return Bank ? Bank->getID() : Default; } @@ -2102,6 +2119,14 @@ AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg, return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); } +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getAGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID, Size); +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -2725,6 +2750,38 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case Intrinsic::amdgcn_mfma_f32_4x4x1f32: + case Intrinsic::amdgcn_mfma_f32_4x4x4f16: + case Intrinsic::amdgcn_mfma_i32_4x4x4i8: + case Intrinsic::amdgcn_mfma_f32_4x4x2bf16: + case Intrinsic::amdgcn_mfma_f32_16x16x1f32: + case Intrinsic::amdgcn_mfma_f32_16x16x4f32: + case Intrinsic::amdgcn_mfma_f32_16x16x4f16: + case Intrinsic::amdgcn_mfma_f32_16x16x16f16: + case Intrinsic::amdgcn_mfma_i32_16x16x4i8: + case Intrinsic::amdgcn_mfma_i32_16x16x16i8: + case Intrinsic::amdgcn_mfma_f32_16x16x2bf16: + case Intrinsic::amdgcn_mfma_f32_16x16x8bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x1f32: + case Intrinsic::amdgcn_mfma_f32_32x32x2f32: + case Intrinsic::amdgcn_mfma_f32_32x32x4f16: + case Intrinsic::amdgcn_mfma_f32_32x32x8f16: + case Intrinsic::amdgcn_mfma_i32_32x32x4i8: + case Intrinsic::amdgcn_mfma_i32_32x32x8i8: + case Intrinsic::amdgcn_mfma_f32_32x32x2bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x4bf16: { + // Default for MAI intrinsics. + // srcC can also be an immediate which can be folded later. + // FIXME: Should we eventually add an alternative mapping with AGPR src + // for srcA/srcB? + // + // vdst, srcA, srcB, srcC + OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } } break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index a14b74961118a..9549e444ade54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -103,6 +103,11 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const; + // Return a value mapping for an operand that is required to be a AGPR. + const ValueMapping *getAGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Regs. This appropriately sets the regbank of the new registers. void split64BitValueForMapping(MachineIRBuilder &B, @@ -131,6 +136,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const MachineInstr &MI, const MachineRegisterInfo &MRI) const; bool isSALUMapping(const MachineInstr &MI) const; + const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingAllVGPR( diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td index 00f53b1575770..ab3b176ac2147 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -18,3 +18,7 @@ def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>; // It is helpful to distinguish conditions from ordinary SGPRs. def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; + +def AGPRRegBank : RegisterBank <"AGPR", + [AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024] +>; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4ae981581027f..9388592c88734 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -304,15 +304,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address); if (Res) { IsSDWA = true; break; } - // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and - // v_mad_mixhi_f16 for FMA variants. Try to decode using this special - // table first so we print the correct name. - - if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { - Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); - if (Res) break; - } - if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); if (Res) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index ca17ba8b7229c..f2c00ddce94c3 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -429,6 +429,29 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, return true; } + // Check the case where we might introduce a second constant operand to a + // scalar instruction + if (TII->isSALU(MI->getOpcode())) { + const MCInstrDesc &InstDesc = MI->getDesc(); + const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo]; + const SIRegisterInfo &SRI = TII->getRegisterInfo(); + + // Fine if the operand can be encoded as an inline constant + if (OpToFold->isImm()) { + if (!SRI.opCanUseInlineConstant(OpInfo.OperandType) || + !TII->isInlineConstant(*OpToFold, OpInfo)) { + // Otherwise check for another constant + for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) { + auto &Op = MI->getOperand(i); + if (OpNo != i && + TII->isLiteralConstantLike(Op, OpInfo)) { + return false; + } + } + } + } + } + appendFoldCandidate(FoldList, MI, OpNo, OpToFold); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ed915f03be217..5e39e7c119bc4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6211,7 +6211,11 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, if (ST.hasAddNoCarry()) return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); - Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // If available, prefer to use vcc. + Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) + ? Register(RI.getVCC()) + : RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // TODO: Users need to deal with this. if (!UnusedCarry.isValid()) return MachineInstrBuilder(); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 6f9abd3a8d9b9..bf052dc3c9304 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -372,12 +372,15 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { // exit" mask. MachineInstr *And = nullptr, *Or = nullptr; if (!SkipAnding) { - And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst) + Register AndReg = MRI->createVirtualRegister(BoolRC); + And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg) .addReg(Exec) .add(MI.getOperand(1)); Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) - .addReg(Dst) + .addReg(AndReg) .add(MI.getOperand(2)); + if (LIS) + LIS->createAndComputeVirtRegInterval(AndReg); } else Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) .add(MI.getOperand(1)) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 9b3b2436475ce..05c81feb23ecd 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -26,6 +26,7 @@ #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -73,8 +74,8 @@ class SIPeepholeSDWA : public MachineFunctionPass { const SIRegisterInfo *TRI; const SIInstrInfo *TII; - std::unordered_map> SDWAOperands; - std::unordered_map PotentialMatches; + MapVector> SDWAOperands; + MapVector PotentialMatches; SmallVector ConvertedInstructions; Optional foldToImm(const MachineOperand &Op) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index efcc7266316e0..5796c6e6a112c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1123,11 +1123,15 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!IsVOP2) MIB.addImm(0); // clamp bit } else { - Register ConstOffsetReg = - RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false); + assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 && + "Need to reuse carry out register"); - // This should always be able to use the unused carry out. - assert(ConstOffsetReg && "this scavenge should not be able to fail"); + // Use scavenged unused carry out as offset register. + Register ConstOffsetReg; + if (!isWave32) + ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); + else + ConstOffsetReg = MIB.getReg(1); BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) .addImm(Offset); @@ -1136,10 +1140,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MIB.addImm(0); // clamp bit } } else { - // We have to produce a carry out, and we there isn't a free SGPR - // pair for it. We can keep the whole computation on the SALU to - // avoid clobbering an additional register at the cost of an extra - // mov. + // We have to produce a carry out, and there isn't a free SGPR pair + // for it. We can keep the whole computation on the SALU to avoid + // clobbering an additional register at the cost of an extra mov. // We may have 1 free scratch SGPR even though a carry out is // unavailable. Only one additional mov is needed. @@ -1161,9 +1164,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg) .addReg(ScaledReg, RegState::Kill) .addImm(Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) - .addReg(DiffReg, RegState::Kill) - .addImm(ST.getWavefrontSizeLog2()); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); } } } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index ac3dea1a1a281..ac8c56fa3a038 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -144,6 +144,11 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { return isSGPRClass(RC); } + /// \returns true if this class contains only AGPR registers + bool isAGPRClass(const TargetRegisterClass *RC) const { + return hasAGPRs(RC) && !hasVGPRs(RC); + } + /// \returns true if this class contains VGPR registers. bool hasVGPRs(const TargetRegisterClass *RC) const; diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index 2e6f756d522c8..9076c191d8397 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -43,7 +43,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); -FunctionPass *createARMCodeGenPreparePass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); @@ -61,7 +60,6 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, void initializeARMParallelDSPPass(PassRegistry &); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); -void initializeARMCodeGenPreparePass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); void initializeThumb2SizeReducePass(PassRegistry &); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 285dad1cf29a3..66bfd4c82e25c 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -391,11 +391,9 @@ def FeatureExecuteOnly : SubtargetFeature<"execute-only", "Enable the generation of " "execute only code.">; -foreach i = {6-11} in - def FeatureReserveR#i : SubtargetFeature<"reserve-r"#i, - "ReservedGPRegisters["#i#"]", "true", - "Reserve R"#i#", making it " - "unavailable as a GPR">; +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 10153dd2e3950..ed0969fa625b0 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -752,7 +752,7 @@ void ARMAsmPrinter::emitAttributes() { if (STI.isRWPI()) ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsSB); - else if (STI.isGPRegisterReserved(9)) + else if (STI.isR9Reserved()) ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved); else diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index ef10c9f738ef8..cecc16ffccba8 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2723,25 +2723,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, return false; } -/// getSwappedCondition - assume the flags are set by MI(a,b), return -/// the condition code if we modify the instructions such that flags are -/// set by MI(b,a). -inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { - switch (CC) { - default: return ARMCC::AL; - case ARMCC::EQ: return ARMCC::EQ; - case ARMCC::NE: return ARMCC::NE; - case ARMCC::HS: return ARMCC::LS; - case ARMCC::LO: return ARMCC::HI; - case ARMCC::HI: return ARMCC::LO; - case ARMCC::LS: return ARMCC::HS; - case ARMCC::GE: return ARMCC::LE; - case ARMCC::LT: return ARMCC::GT; - case ARMCC::GT: return ARMCC::LT; - case ARMCC::LE: return ARMCC::GE; - } -} - /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return /// the condition code if we modify the instructions such that flags are /// set by ADD(a,b,X). diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index afcdb648cbc8f..4ace52b32e9ff 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -198,11 +198,9 @@ getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, getFramePointerReg(STI)); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); - for (size_t R = 0; R < ARM::GPRRegClass.getNumRegs(); ++R) { - if (STI.isGPRegisterReserved(R)) { - markSuperRegs(Reserved, ARM::R0 + R); - } - } + // Some targets reserve R9. + if (STI.isR9Reserved()) + markSuperRegs(Reserved, ARM::R9); // Reserve D16-D31 if the subtarget doesn't support them. if (!STI.hasD32()) { static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!"); @@ -282,7 +280,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case ARM::GPRRegClassID: { bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() ? TFI->hasFP(MF) : true; - return 10 - HasFP - STI.getNumGPRegistersReserved(); + return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: @@ -382,11 +380,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); - const ARMSubtarget &STI = MF.getSubtarget(); - - // Disable base pointer R6 if -ffixed-r6 is used. - if (STI.isGPRegisterReserved(BasePtr - ARM::R0)) - return false; // If we have stack realignment and VLAs, we have no pointer to use to // access the stack. If we have stack realignment, and a large call frame, @@ -423,7 +416,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); - const ARMSubtarget &STI = MF.getSubtarget(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. There are VLAs in the function and the base pointer is disabled. @@ -433,9 +425,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget()))) return false; - // Disable base pointer R6 if -ffixed-r6 is used. - if (STI.isGPRegisterReserved(BasePtr - ARM::R0)) - return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. if (TFI->hasReservedCallFrame(MF)) diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 24ca25f73e96d..634fb89b8e893 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1917,6 +1917,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() { MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(ARM::t2LE)); + // Swapped a t2Bcc for a t2LE, so no need to update the size of the block. MIB.add(Br.MI->getOperand(0)); Br.MI->eraseFromParent(); Br.MI = MIB; @@ -1975,21 +1976,20 @@ bool ARMConstantIslands::optimizeThumb2Branches() { .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); Cmp.MI->eraseFromParent(); - BBInfoVector &BBInfo = BBUtils->getBBInfo(); - BBInfo[MBB->getNumber()].Size -= 2; if (Br.MI->getOpcode() == ARM::tBcc) { Br.MI->eraseFromParent(); Br.MI = NewBR; - } else if (&MBB->back() != Br.MI) { - // We've generated an LE and already erased the original conditional - // branch. The CBN?Z is now used to branch to the other successor, so an - // unconditional branch terminator is now redundant. + BBUtils->adjustBBSize(MBB, -2); + } else if (MBB->back().getOpcode() != ARM::t2LE) { + // An LE has been generated, but it's not the terminator - that is an + // unconditional branch. However, the logic has now been reversed with the + // CBN?Z being the conditional branch and the LE being the unconditional + // branch. So this means we can remove the redundant unconditional branch + // at the end of the block. MachineInstr *LastMI = &MBB->back(); - if (LastMI != Br.MI) { - BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); - LastMI->eraseFromParent(); - } + BBUtils->adjustBBSize(MBB, -LastMI->getDesc().getSize()); + LastMI->eraseFromParent(); } BBUtils->adjustBBOffsetsAfter(MBB); ++NumCBZ; diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 563fdda561049..de4377ec5a471 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1213,9 +1213,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBBI = NewMI; return true; } + case ARM::VMOVHcc: case ARM::VMOVScc: case ARM::VMOVDcc: { - unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; + unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) .add(MI.getOperand(2)) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 106894e28f033..5428bd6c94b35 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1704,19 +1704,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (STI.isRWPI() && Reg == ARM::R9) { - // Paranoid check for use of R9 with RWPI. Clobbering R9 with -frwpi will - // emit warnings about undefined behaviour but maybe theres's a valid use - // case so on that basis allow it to be pushed/popped in the - // prologue/epilogue. - } else if (Reg > ARM::R0 && ARM::GPRRegClass.contains(Reg) && - STI.isGPRegisterReserved(Reg - ARM::R0)) { - LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " has been reserved and" - << " should not be allocatable" - << " or spillable.\n"); - SavedRegs.reset(Reg); - continue; - } bool Spilled = false; if (SavedRegs.test(Reg)) { Spilled = true; @@ -1961,7 +1948,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is saved low register, RegDeficit = " << RegDeficit << "\n"); - } else if (!STI.isGPRegisterReserved(Reg - ARM::R0)) { + } else { AvailableRegs.push_back(Reg); LLVM_DEBUG( dbgs() @@ -1976,7 +1963,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " << RegDeficit << "\n"); - } else if (!STI.isGPRegisterReserved(7)) { + } else { AvailableRegs.push_back(ARM::R7); LLVM_DEBUG( dbgs() diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 46a2560e16745..a6b334938e179 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1351,11 +1351,27 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, unsigned Shift) { unsigned Opcode = Op->getOpcode(); - ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) - ? cast(Op)->getAddressingMode() - : cast(Op)->getAddressingMode(); + ISD::MemIndexedMode AM; + switch (Opcode) { + case ISD::LOAD: + AM = cast(Op)->getAddressingMode(); + break; + case ISD::STORE: + AM = cast(Op)->getAddressingMode(); + break; + case ISD::MLOAD: + AM = cast(Op)->getAddressingMode(); + break; + case ISD::MSTORE: + AM = cast(Op)->getAddressingMode(); + break; + default: + llvm_unreachable("Unexpected Opcode for Imm7Offset"); + } + int RHSC; - if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. + // 7 bit constant, shifted by Shift. + if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) @@ -1625,58 +1641,93 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { } bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { - LoadSDNode *LD = cast(N); - ISD::MemIndexedMode AM = LD->getAddressingMode(); - if (AM == ISD::UNINDEXED) - return false; - EVT LoadedVT = LD->getMemoryVT(); - if (!LoadedVT.isVector()) - return false; - bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; - SDValue Offset; - bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + EVT LoadedVT; unsigned Opcode = 0; - unsigned Align = LD->getAlignment(); - bool IsLE = Subtarget->isLittle(); + bool isSExtLd, isPre; + unsigned Align; + ARMVCC::VPTCodes Pred; + SDValue PredReg; + SDValue Chain, Base, Offset; + + if (LoadSDNode *LD = dyn_cast(N)) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + + Chain = LD->getChain(); + Base = LD->getBasePtr(); + Offset = LD->getOffset(); + Align = LD->getAlignment(); + isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + Pred = ARMVCC::None; + PredReg = CurDAG->getRegister(0, MVT::i32); + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + Chain = LD->getChain(); + Base = LD->getBasePtr(); + Offset = LD->getOffset(); + Align = LD->getAlignment(); + isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + Pred = ARMVCC::Then; + PredReg = LD->getMask(); + } else + llvm_unreachable("Expected a Load or a Masked Load!"); + + // We allow LE non-masked loads to change the type (for example use a vldrb.8 + // as opposed to a vldrw.32). This can allow extra addressing modes or + // alignments for what is otherwise an equivalent instruction. + bool CanChangeType = Subtarget->isLittle() && !isa(N); + + SDValue NewOffset; if (Align >= 2 && LoadedVT == MVT::v4i16 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; else Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; } else if (LoadedVT == MVT::v8i8 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; else Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; } else if (LoadedVT == MVT::v4i8 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; else Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; } else if (Align >= 4 && - (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) + (CanChangeType || LoadedVT == MVT::v4i32 || + LoadedVT == MVT::v4f32) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; else if (Align >= 2 && - (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) + (CanChangeType || LoadedVT == MVT::v8i16 || + LoadedVT == MVT::v8f16) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; - else if ((IsLE || LoadedVT == MVT::v16i8) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) + else if ((CanChangeType || LoadedVT == MVT::v16i8) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; else return false; - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - SDValue Ops[] = {Base, Offset, - CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), - CurDAG->getRegister(0, MVT::i32), Chain}; - SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), + SDValue Ops[] = {Base, NewOffset, + CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, + Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0), MVT::i32, MVT::Other, Ops); transferMemOperands(N, New); ReplaceUses(SDValue(N, 0), SDValue(New, 1)); @@ -3292,6 +3343,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::MLOAD: + if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) + return; + // Other cases are autogenerated. + break; case ARMISD::WLS: case ARMISD::LE: { SDValue Ops[] = { N->getOperand(1), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e359756b7bf45..3dcddd73f309d 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -209,6 +209,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, VT != MVT::v2i64 && VT != MVT::v1i64) for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); + if (!VT.isFloatingPoint()) + for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) + setOperationAction(Opcode, VT, Legal); } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { @@ -296,6 +299,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); } } @@ -322,6 +327,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); } if (HasMVEFP) { @@ -374,12 +381,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { // Pre and Post inc on these are legal, given the correct extends for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::v8i8, Legal); - setIndexedStoreAction(im, MVT::v8i8, Legal); - setIndexedLoadAction(im, MVT::v4i8, Legal); - setIndexedStoreAction(im, MVT::v4i8, Legal); - setIndexedLoadAction(im, MVT::v4i16, Legal); - setIndexedStoreAction(im, MVT::v4i16, Legal); + for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); + } } // Predicate types @@ -5572,15 +5579,9 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) - .Case("r6", ARM::R6) - .Case("r7", ARM::R7) - .Case("r8", ARM::R8) - .Case("r9", ARM::R9) - .Case("r10", ARM::R10) - .Case("r11", ARM::R11) - .Case("sp", ARM::SP) - .Default(ARM::NoRegister); - if (Reg != ARM::NoRegister) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); @@ -8992,6 +8993,12 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { ST->getMemOperand()); } +static bool isZeroVector(SDValue N) { + return (ISD::isBuildVectorAllZeros(N.getNode()) || + (N->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(N->getOperand(0)))); +} + static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); MVT VT = Op.getSimpleValueType(); @@ -8999,13 +9006,7 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { SDValue PassThru = N->getPassThru(); SDLoc dl(Op); - auto IsZero = [](SDValue PassThru) { - return (ISD::isBuildVectorAllZeros(PassThru.getNode()) || - (PassThru->getOpcode() == ARMISD::VMOVIMM && - isNullConstant(PassThru->getOperand(0)))); - }; - - if (IsZero(PassThru)) + if (isZeroVector(PassThru)) return Op; // MVE Masked loads use zero as the passthru value. Here we convert undef to @@ -9013,12 +9014,13 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(0, dl, MVT::i32)); SDValue NewLoad = DAG.getMaskedLoad( - VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), - N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec, + N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + N->getExtensionType(), N->isExpandingLoad()); SDValue Combo = NewLoad; if (!PassThru.isUndef() && (PassThru.getOpcode() != ISD::BITCAST || - !IsZero(PassThru->getOperand(0)))) + !isZeroVector(PassThru->getOperand(0)))) Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); } @@ -12741,6 +12743,39 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return SDValue(); } +static SDValue PerformVCMPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasMVEIntegerOps()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + ARMCC::CondCodes Cond = + (ARMCC::CondCodes)cast(N->getOperand(2))->getZExtValue(); + SDLoc dl(N); + + // vcmp X, 0, cc -> vcmpz X, cc + if (isZeroVector(Op1)) + return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, + N->getOperand(2)); + + unsigned SwappedCond = getSwappedCondition(Cond); + if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) { + // vcmp 0, X, cc -> vcmpz X, reversed(cc) + if (isZeroVector(Op0)) + return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1, + DCI.DAG.getConstant(SwappedCond, dl, MVT::i32)); + // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc) + if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP) + return DCI.DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0, + DCI.DAG.getConstant(SwappedCond, dl, MVT::i32)); + } + + return SDValue(); +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -14421,6 +14456,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformARMBUILD_VECTORCombine(N, DCI); case ARMISD::PREDICATE_CAST: return PerformPREDICATE_CASTCombine(N, DCI); + case ARMISD::VCMP: + return PerformVCMPCombine(N, DCI, Subtarget); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); @@ -15192,14 +15229,19 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, } static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, - bool isSEXTLoad, bool isLE, SDValue &Base, - SDValue &Offset, bool &isInc, - SelectionDAG &DAG) { + bool isSEXTLoad, bool IsMasked, bool isLE, + SDValue &Base, SDValue &Offset, + bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; if (!isa(Ptr->getOperand(1))) return false; + // We allow LE non-masked loads to change the type (for example use a vldrb.8 + // as opposed to a vldrw.32). This can allow extra addressing modes or + // alignments for what is otherwise an equivalent instruction. + bool CanChangeType = isLE && !IsMasked; + ConstantSDNode *RHS = cast(Ptr->getOperand(1)); int RHSC = (int)RHS->getZExtValue(); @@ -15218,7 +15260,7 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, }; // Try to find a matching instruction based on s/zext, Alignment, Offset and - // (in BE) type. + // (in BE/masked) type. Base = Ptr->getOperand(0); if (VT == MVT::v4i16) { if (Align >= 2 && IsInRange(RHSC, 0x80, 2)) @@ -15226,13 +15268,15 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, } else if (VT == MVT::v4i8 || VT == MVT::v8i8) { if (IsInRange(RHSC, 0x80, 1)) return true; - } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) && + } else if (Align >= 4 && + (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) && IsInRange(RHSC, 0x80, 4)) return true; - else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) && + else if (Align >= 2 && + (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) && IsInRange(RHSC, 0x80, 2)) return true; - else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) + else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) return true; return false; } @@ -15252,6 +15296,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; unsigned Align; bool isSEXTLoad = false; + bool IsMasked = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); @@ -15261,6 +15306,17 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Align = ST->getAlignment(); + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + Align = LD->getAlignment(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + Align = ST->getAlignment(); + IsMasked = true; } else return false; @@ -15269,8 +15325,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) isLegal = Subtarget->hasMVEIntegerOps() && getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad, - Subtarget->isLittle(), Base, Offset, - isInc, DAG); + IsMasked, Subtarget->isLittle(), Base, + Offset, isInc, DAG); else { if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, @@ -15298,6 +15354,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue Ptr; unsigned Align; bool isSEXTLoad = false, isNonExt; + bool IsMasked = false; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); @@ -15309,6 +15366,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, Ptr = ST->getBasePtr(); Align = ST->getAlignment(); isNonExt = !ST->isTruncatingStore(); + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + Align = LD->getAlignment(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + Align = ST->getAlignment(); + isNonExt = !ST->isTruncatingStore(); + IsMasked = true; } else return false; @@ -15332,7 +15402,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isLegal = false; if (VT.isVector()) isLegal = Subtarget->hasMVEIntegerOps() && - getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, + getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked, Subtarget->isLittle(), Base, Offset, isInc, DAG); else { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index fe696222ec70a..155e0efff1a8a 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -297,6 +297,28 @@ class RegConstraint { string Constraints = C; } +// ARMCC condition codes. See ARMCC::CondCodes +def ARMCCeq : PatLeaf<(i32 0)>; +def ARMCCne : PatLeaf<(i32 1)>; +def ARMCChs : PatLeaf<(i32 2)>; +def ARMCClo : PatLeaf<(i32 3)>; +def ARMCCmi : PatLeaf<(i32 4)>; +def ARMCCpl : PatLeaf<(i32 5)>; +def ARMCCvs : PatLeaf<(i32 6)>; +def ARMCCvc : PatLeaf<(i32 7)>; +def ARMCChi : PatLeaf<(i32 8)>; +def ARMCCls : PatLeaf<(i32 9)>; +def ARMCCge : PatLeaf<(i32 10)>; +def ARMCClt : PatLeaf<(i32 11)>; +def ARMCCgt : PatLeaf<(i32 12)>; +def ARMCCle : PatLeaf<(i32 13)>; +def ARMCCal : PatLeaf<(i32 14)>; + +// VCC predicates. See ARMVCC::VPTCodes +def ARMVCCNone : PatLeaf<(i32 0)>; +def ARMVCCThen : PatLeaf<(i32 1)>; +def ARMVCCElse : PatLeaf<(i32 2)>; + //===----------------------------------------------------------------------===// // ARM specific transformation functions and pattern fragments. // diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 429d0a1cf1bdf..c81e60b3360a2 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1015,6 +1015,16 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))), + (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + ARMVCCThen, (v4i1 VCCR:$mask), + (v4f32 MQPR:$inactive)))>; + def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), + (v8f16 (MVE_VMAXNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + ARMVCCThen, (v8i1 VCCR:$mask), + (v8f16 MQPR:$inactive)))>; } def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>; @@ -1025,6 +1035,16 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))), + (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + ARMVCCThen, (v4i1 VCCR:$mask), + (v4f32 MQPR:$inactive)))>; + def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), + (v8f16 (MVE_VMINNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + ARMVCCThen, (v8i1 VCCR:$mask), + (v8f16 MQPR:$inactive)))>; } @@ -1042,48 +1062,45 @@ class MVE_VMINMAX size, let Inst{4} = bit_4; } -multiclass MVE_VMINMAX_all_sizes { - def s8 : MVE_VMINMAX; - def s16 : MVE_VMINMAX; - def s32 : MVE_VMINMAX; - def u8 : MVE_VMINMAX; - def u16 : MVE_VMINMAX; - def u32 : MVE_VMINMAX; -} +multiclass MVE_VMINMAX_m { + def "" : MVE_VMINMAX; -defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>; -defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>; + let Predicates = [HasMVEInt] in { + // Unpredicated min/max + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + // Predicated min/max + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMAX + : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>; +multiclass MVE_VMIN + : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>; + +defm MVE_VMINs8 : MVE_VMIN; +defm MVE_VMINs16 : MVE_VMIN; +defm MVE_VMINs32 : MVE_VMIN; +defm MVE_VMINu8 : MVE_VMIN; +defm MVE_VMINu16 : MVE_VMIN; +defm MVE_VMINu32 : MVE_VMIN; + +defm MVE_VMAXs8 : MVE_VMAX; +defm MVE_VMAXs16 : MVE_VMAX; +defm MVE_VMAXs32 : MVE_VMAX; +defm MVE_VMAXu8 : MVE_VMAX; +defm MVE_VMAXu16 : MVE_VMAX; +defm MVE_VMAXu32 : MVE_VMAX; + // end of mve_comp instructions // start of mve_bit instructions @@ -1233,53 +1250,61 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>; } -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), - (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), - (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), - (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), - (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), - (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), - (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), - (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), - (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; +multiclass MVE_bit_op { + let Predicates = [HasMVEInt] in { + // Unpredicated operation + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + // Predicated operation + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (instruction + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; + +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; + +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; + +multiclass MVE_bit_op_with_inv { + let Predicates = [HasMVEInt] in { + // Unpredicated operation + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))), + (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + // Predicated operation + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (instruction + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; + +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; + class MVE_bit_cmode cmode, dag inOps> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { @@ -1512,8 +1537,9 @@ class MVE_int size, list pattern=[]> let Inst{3-1} = Qm{2-0}; } -class MVE_VMULt1 size, list pattern=[]> - : MVE_int<"vmul", suffix, size, pattern> { +class MVE_VMULt1 size, + list pattern=[]> + : MVE_int { let Inst{28} = 0b0; let Inst{25-23} = 0b110; @@ -1524,19 +1550,33 @@ class MVE_VMULt1 size, list pattern=[]> let validForTailPredication = 1; } -def MVE_VMULt1i8 : MVE_VMULt1<"i8", 0b00>; -def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>; -def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>; +multiclass MVE_VMUL_m { + def "" : MVE_VMULt1; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + let Predicates = [HasMVEInt] in { + // Unpredicated multiply + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMUL + : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>; + +defm MVE_VMULi8 : MVE_VMUL; +defm MVE_VMULi16 : MVE_VMUL; +defm MVE_VMULi32 : MVE_VMUL; + class MVE_VQxDMULH size, bit rounding, list pattern=[]> : MVE_int { @@ -1590,7 +1630,7 @@ multiclass MVE_VADDSUB_m(NAME) (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - (i32 1), (VTI.Pred VCCR:$mask), + ARMVCCThen, (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))>; } } @@ -1664,7 +1704,8 @@ let Predicates = [HasMVEInt] in { } -class MVE_VABD_int size, list pattern=[]> +class MVE_VABD_int size, + list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { let Inst{28} = U; @@ -1676,12 +1717,35 @@ class MVE_VABD_int size, list pattern=[]> let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { + // Unpredicated absolute difference + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated absolute difference + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD; class MVE_VRHADD size, list pattern=[]> : MVE_int<"vrhadd", suffix, size, pattern> { @@ -1738,60 +1802,6 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; -} - class MVE_VDUP pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { @@ -1969,17 +1979,17 @@ multiclass vqabsneg_pattern 0 ? r : (r == INT_MIN ? INT_MAX : -r) def : Pat<(VTI.Vec (vselect - (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), (i32 12))), + (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)), (VTI.Vec MQPR:$reg), (VTI.Vec (vselect - (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))), + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)), int_max, (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))), (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>; // Similarly, this tree represents vqneg, i.e. the following vectorized expression: // r == INT_MIN ? INT_MAX : -r def : Pat<(VTI.Vec (vselect - (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))), + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)), int_max, (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))), (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>; @@ -2781,8 +2791,8 @@ class MVEFloatArithNeon pattern=[]> - : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd), +class MVE_VMUL_fp pattern=[]> + : MVEFloatArithNeon { bits<4> Qd; @@ -2800,16 +2810,29 @@ class MVE_VMUL_fp pattern=[]> let validForTailPredication = 1; } -def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; -def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>; +multiclass MVE_VMULT_fp_m { + def "" : MVE_VMUL_fp; -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMUL_fp_m + : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>; + +defm MVE_VMULf32 : MVE_VMUL_fp_m; +defm MVE_VMULf16 : MVE_VMUL_fp_m; + class MVE_VCMLA pattern=[]> : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), @@ -2890,7 +2913,7 @@ multiclass MVE_VADDSUB_fp_m(NAME) (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - (i32 1), (VTI.Pred VCCR:$mask), + ARMVCCThen, (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))>; } } @@ -2950,8 +2973,28 @@ class MVE_VABD_fp let validForTailPredication = 1; } -def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; -def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>; +multiclass MVE_VABDT_fp_m { + def "" : MVE_VABD_fp; + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD_fp_m + : MVE_VABDT_fp_m; + +defm MVE_VABDf32 : MVE_VABD_fp_m; +defm MVE_VABDf16 : MVE_VABD_fp_m; class MVE_VCVT_fix pattern=[]> @@ -3303,155 +3346,120 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; -multiclass unpred_vcmp_z { - def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), +multiclass unpred_vcmp_z { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; - def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; - def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; } -multiclass unpred_vcmp_r { - def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), +multiclass unpred_vcmp_r { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)), (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; - def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)), (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; - def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)), (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; - def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; - - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; -} - -multiclass unpred_vcmp_r_reversible { - defm "": unpred_vcmp_r; - - // Additional patterns that match the vector/scalar comparisons the - // opposite way round, with the ARMvdup in the first operand of the - // ARMvcmp. These will usually need a different condition code - // (except for the symmetric conditions EQ and NE). They're in a - // separate multiclass because the unsigned CS and HI comparisons - // don't have reversed forms. - - def : Pat<(v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; - def : Pat<(v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; - def : Pat<(v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; - - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; -} - -multiclass unpred_vcmpf_z { - def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_z { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; - def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), - (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))), + (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; } -multiclass unpred_vcmpf_r { - def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), +multiclass unpred_vcmpf_r { + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; - def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; - def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; - def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; - def : Pat<(v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed))>; - def : Pat<(v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed))>; - - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), - (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), - (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; - - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>; } let Predicates = [HasMVEInt] in { - defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; - defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; - defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; - defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; - defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; - defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; - defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; - defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; - - defm MVE_VCEQ : unpred_vcmp_r_reversible<"i", 0, 0>; - defm MVE_VCNE : unpred_vcmp_r_reversible<"i", 1, 1>; - defm MVE_VCGE : unpred_vcmp_r_reversible<"s", 10, 13>; - defm MVE_VCLT : unpred_vcmp_r_reversible<"s", 11, 12>; - defm MVE_VCGT : unpred_vcmp_r_reversible<"s", 12, 11>; - defm MVE_VCLE : unpred_vcmp_r_reversible<"s", 13, 10>; - defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; - defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; + defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>; + defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>; + defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>; + defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>; + defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>; + defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>; + defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>; + defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>; } let Predicates = [HasMVEFloat] in { - defm MVE_VFCEQZ : unpred_vcmpf_z<0>; - defm MVE_VFCNEZ : unpred_vcmpf_z<1>; - defm MVE_VFCGEZ : unpred_vcmpf_z<10>; - defm MVE_VFCLTZ : unpred_vcmpf_z<11>; - defm MVE_VFCGTZ : unpred_vcmpf_z<12>; - defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + defm MVE_VFCEQZ : unpred_vcmpf_z; + defm MVE_VFCNEZ : unpred_vcmpf_z; + defm MVE_VFCGEZ : unpred_vcmpf_z; + defm MVE_VFCLTZ : unpred_vcmpf_z; + defm MVE_VFCGTZ : unpred_vcmpf_z; + defm MVE_VFCLEZ : unpred_vcmpf_z; - defm MVE_VFCEQ : unpred_vcmpf_r<0, 0>; - defm MVE_VFCNE : unpred_vcmpf_r<1, 1>; - defm MVE_VFCGE : unpred_vcmpf_r<10, 13>; - defm MVE_VFCLT : unpred_vcmpf_r<11, 12>; - defm MVE_VFCGT : unpred_vcmpf_r<12, 11>; - defm MVE_VFCLE : unpred_vcmpf_r<13, 10>; + defm MVE_VFCEQ : unpred_vcmpf_r; + defm MVE_VFCNE : unpred_vcmpf_r; + defm MVE_VFCGE : unpred_vcmpf_r; + defm MVE_VFCLT : unpred_vcmpf_r; + defm MVE_VFCGT : unpred_vcmpf_r; + defm MVE_VFCLE : unpred_vcmpf_r; } @@ -3615,8 +3623,8 @@ defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Q defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>; defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>; -class MVE_VxMULH size, - bit round, list pattern=[]> +class MVE_VxMULH size, bit round, + list pattern=[]> : MVE_qDest_qSrc { @@ -3632,19 +3640,45 @@ class MVE_VxMULH size, let Inst{0} = 0b1; } -def MVE_VMULHs8 : MVE_VxMULH<"vmulh", "s8", 0b0, 0b00, 0b0>; -def MVE_VMULHs16 : MVE_VxMULH<"vmulh", "s16", 0b0, 0b01, 0b0>; -def MVE_VMULHs32 : MVE_VxMULH<"vmulh", "s32", 0b0, 0b10, 0b0>; -def MVE_VMULHu8 : MVE_VxMULH<"vmulh", "u8", 0b1, 0b00, 0b0>; -def MVE_VMULHu16 : MVE_VxMULH<"vmulh", "u16", 0b1, 0b01, 0b0>; -def MVE_VMULHu32 : MVE_VxMULH<"vmulh", "u32", 0b1, 0b10, 0b0>; +multiclass MVE_VxMULH_m { + def "" : MVE_VxMULH; -def MVE_VRMULHs8 : MVE_VxMULH<"vrmulh", "s8", 0b0, 0b00, 0b1>; -def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>; -def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>; -def MVE_VRMULHu8 : MVE_VxMULH<"vrmulh", "u8", 0b1, 0b00, 0b1>; -def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>; -def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>; + let Predicates = [HasMVEInt] in { + // Unpredicated multiply returning high bits + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply returning high bits + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VMULT + : MVE_VxMULH_m; + +defm MVE_VMULHs8 : MVE_VMULT<"vmulh", MVE_v16s8, 0b0>; +defm MVE_VMULHs16 : MVE_VMULT<"vmulh", MVE_v8s16, 0b0>; +defm MVE_VMULHs32 : MVE_VMULT<"vmulh", MVE_v4s32, 0b0>; +defm MVE_VMULHu8 : MVE_VMULT<"vmulh", MVE_v16u8, 0b0>; +defm MVE_VMULHu16 : MVE_VMULT<"vmulh", MVE_v8u16, 0b0>; +defm MVE_VMULHu32 : MVE_VMULT<"vmulh", MVE_v4u32, 0b0>; + +defm MVE_VRMULHs8 : MVE_VMULT<"vrmulh", MVE_v16s8, 0b1>; +defm MVE_VRMULHs16 : MVE_VMULT<"vrmulh", MVE_v8s16, 0b1>; +defm MVE_VRMULHs32 : MVE_VMULT<"vrmulh", MVE_v4s32, 0b1>; +defm MVE_VRMULHu8 : MVE_VMULT<"vrmulh", MVE_v16u8, 0b1>; +defm MVE_VRMULHu16 : MVE_VMULT<"vrmulh", MVE_v8u16, 0b1>; +defm MVE_VRMULHu32 : MVE_VMULT<"vrmulh", MVE_v4u32, 0b1>; class MVE_VxMOVxN size, bit T, list pattern=[]> @@ -3716,7 +3750,7 @@ multiclass MVE_VCVT_f2h_m { (v4i1 VCCR:$mask))), (v8f16 (!cast(NAME) (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), - (i32 1), (v4i1 VCCR:$mask)))>; + ARMVCCThen, (v4i1 VCCR:$mask)))>; } } @@ -4224,7 +4258,7 @@ def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; let hasSideEffects = 1 in -class MVE_VCTP size, list pattern=[]> +class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { bits<4> Rn; @@ -4242,20 +4276,22 @@ class MVE_VCTP size, list pattern=[]> let validForTailPredication = 1; } -def MVE_VCTP8 : MVE_VCTP<"8", 0b00>; -def MVE_VCTP16 : MVE_VCTP<"16", 0b01>; -def MVE_VCTP32 : MVE_VCTP<"32", 0b10>; -def MVE_VCTP64 : MVE_VCTP<"64", 0b11>; +multiclass MVE_VCTP { + def "": MVE_VCTPInst; -let Predicates = [HasMVEInt] in { - def : Pat<(int_arm_vctp8 rGPR:$Rn), - (v16i1 (MVE_VCTP8 rGPR:$Rn))>; - def : Pat<(int_arm_vctp16 rGPR:$Rn), - (v8i1 (MVE_VCTP16 rGPR:$Rn))>; - def : Pat<(int_arm_vctp32 rGPR:$Rn), - (v4i1 (MVE_VCTP32 rGPR:$Rn))>; + let Predicates = [HasMVEInt] in { + def : Pat<(intr rGPR:$Rn), + (VTI.Pred (!cast(NAME) rGPR:$Rn))>; + def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)), + (VTI.Pred (!cast(NAME) rGPR:$Rn, ARMVCCThen, VCCR:$mask))>; + } } +defm MVE_VCTP8 : MVE_VCTP; +defm MVE_VCTP16 : MVE_VCTP; +defm MVE_VCTP32 : MVE_VCTP; +defm MVE_VCTP64 : MVE_VCTP; + // end of mve_qDest_rSrc // start of coproc mov @@ -4718,9 +4754,9 @@ multiclass MVE_VLDR_rq_w VTIs> { def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)), (VTI.Vec (!cast(NAME) GPR:$base, MQPR:$offsets))>; def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))), - (VTI.Vec (!cast(NAME#"_u") GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>; + (VTI.Vec (!cast(NAME#"_u") GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>; def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))), - (VTI.Vec (!cast(NAME) GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>; + (VTI.Vec (!cast(NAME) GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>; } } multiclass MVE_VLDR_rq_b VTIs> { @@ -4730,7 +4766,7 @@ multiclass MVE_VLDR_rq_b VTIs> { def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)), (VTI.Vec (!cast(NAME) GPR:$base, MQPR:$offsets))>; def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))), - (VTI.Vec (!cast(NAME) GPR:$base, MQPR:$offsets, 1, VCCR:$pred))>; + (VTI.Vec (!cast(NAME) GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>; } } multiclass MVE_VSTR_rq_w VTIs> { @@ -4742,9 +4778,9 @@ multiclass MVE_VSTR_rq_w VTIs> { def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift), (!cast(NAME) MQPR:$data, GPR:$base, MQPR:$offsets)>; def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)), - (!cast(NAME#"_u") MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>; + (!cast(NAME#"_u") MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>; def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)), - (!cast(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>; + (!cast(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>; } } multiclass MVE_VSTR_rq_b VTIs> { @@ -4754,7 +4790,7 @@ multiclass MVE_VSTR_rq_b VTIs> { def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0), (!cast(NAME) MQPR:$data, GPR:$base, MQPR:$offsets)>; def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)), - (!cast(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, 1, VCCR:$pred)>; + (!cast(NAME) MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>; } } @@ -4835,7 +4871,7 @@ multiclass MVE_VLDR_qi(NAME) - (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred))>; + (AVTI.Vec MQPR:$addr), (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>; } } multiclass MVE_VSTR_qi(NAME) - (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred)>; + (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>; def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))), (AVTI.Vec (!cast(NAME # "_pre") @@ -4859,7 +4895,7 @@ multiclass MVE_VSTR_qi(NAME # "_pre") - (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), 1, VCCR:$pred))>; + (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>; } } @@ -5116,87 +5152,87 @@ def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm", let Predicates = [HasMVEInt] in { def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), - (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), - (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), - (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), - (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), - (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), - (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>; + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>; def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), - (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>; def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), - (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>; def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), - (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>; def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), - (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>; // Pred <-> Int def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))), - (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))), - (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))), - (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))), - (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))), - (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))), - (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))), - (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))), - (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))), - (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))), - (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>; + (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))), - (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))), - (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>; } let Predicates = [HasMVEFloat] in { // Pred <-> Float // 112 is 1.0 in float def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))), - (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>; // 2620 in 1.0 in half def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))), - (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>; // 240 is -1.0 in float def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))), - (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>; // 2748 is -1.0 in half def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))), - (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>; } def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, @@ -5288,61 +5324,7 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { // Patterns //===----------------------------------------------------------------------===// -class MVE_vector_store_typed - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; -class MVE_vector_maskedstore_typed - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, (i32 1), VCCR:$pred)>; - -multiclass MVE_vector_store { - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; -} - -class MVE_vector_load_typed - : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), - (Ty (RegImmInst t2addrmode_imm7:$addr))>; -class MVE_vector_maskedload_typed - : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), - (Ty (RegImmInst t2addrmode_imm7:$addr, (i32 1), VCCR:$pred))>; - -multiclass MVE_vector_load { - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; -} - -class MVE_vector_offset_store_typed - : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), - (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; - -multiclass MVE_vector_offset_store { - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; -} +// PatFrags for loads and stores. Often trying to keep semi-consistent names. def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (pre_store node:$val, node:$ptr, node:$offset), [{ @@ -5362,77 +5344,249 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), }]>; -def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); return Ld->getMemoryVT().getScalarType() == MVT::i8; }]>; -def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::ZEXTLOAD; }]>; -def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; -def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; }]>; -def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::ZEXTLOAD; }]>; -def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; -def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; }]>; -def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ +def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (maskedstore8 node:$val, node:$ptr, node:$pred), [{ - return cast(N)->isTruncatingStore(); +def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; }]>; -def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ +def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; + +def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; +}]>; +def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return AM == ISD::POST_INC || AM == ISD::POST_DEC; +}]>; +def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ auto *St = cast(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; }]>; +def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; +def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; + + +// PatFrags for "Aligned" extending / truncating + +def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>; +def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>; +def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>; + +def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi8 node:$val, node:$ptr)>; +def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi8 node:$val, node:$base, node:$offset)>; +def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi8 node:$val, node:$base, node:$offset)>; -def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (maskedstore16 node:$val, node:$ptr, node:$pred), [{ +let MinAlignment = 2 in { + def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; + def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; + def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; + + def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi16 node:$val, node:$ptr)>; + def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; +} + +def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred), + (masked_st node:$val, node:$base, undef, node:$pred), [{ return cast(N)->isTruncatingStore(); }]>; -def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ +def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred), + (truncmaskedst node:$val, node:$base, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred), + (truncmaskedst node:$val, node:$base, node:$pred), [{ auto *St = cast(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); - return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (masked_st node:$val, node:$base, node:$offset, node:$pred), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return cast(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); +}]>; +def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (masked_st node:$val, node:$base, node:$offset, node:$postd), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return cast(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC); +}]>; +def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; +def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + +// Load/store patterns + +class MVE_vector_store_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; + +class MVE_vector_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, ARMVCCThen, VCCR:$pred)>; + +multiclass MVE_vector_store { + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; +} + +class MVE_vector_load_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), + (Ty (RegImmInst t2addrmode_imm7:$addr))>; + +class MVE_vector_maskedload_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7:$addr, ARMVCCThen, VCCR:$pred))>; + +multiclass MVE_vector_load { + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; +} + +class MVE_vector_offset_store_typed + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + +class MVE_vector_offset_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, ARMVCCThen, VCCR:$pred)>; + +multiclass MVE_vector_offset_store { + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; +} + let Predicates = [HasMVEInt, IsLE] in { // Stores @@ -5510,116 +5664,73 @@ let Predicates = [HasMVEInt, IsBE] in { let Predicates = [HasMVEInt] in { // Aligned masked store, shared between LE and BE - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - // Truncating stores - def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + + // Pre/Post inc masked stores + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + // Aligned masked loads - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - // Extending masked loads. - def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; } // Widening/Narrowing Loads/Stores -let MinAlignment = 2 in { - def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), - (truncstorevi16 node:$val, node:$ptr)>; - def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncstvi16 node:$val, node:$base, node:$offset)>; - def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncstvi16 node:$val, node:$base, node:$offset)>; -} - -let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), - (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), - (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), - (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; - - def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), - (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; - - def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), - (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; -} - - -let MinAlignment = 2 in { - def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; - def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; - def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; -} - -multiclass MVEExtLoad { - def _Any : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("extloadvi" # SrcElemBits # Align) am:$addr)), - (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) - am:$addr)>; - def _Z : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("zextloadvi" # SrcElemBits # Align) am:$addr)), - (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) - am:$addr)>; - def _S : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("sextloadvi" # SrcElemBits # Align) am:$addr)), - (!cast("MVE_VLDR" # SrcElemType # "S" # DestElemBits) - am:$addr)>; +multiclass MVEExtLoadStore { + // Trunc stores + def : Pat<(!cast("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7:$addr), + (!cast(StoreInst) MQPR:$val, taddrmode_imm7:$addr)>; + def : Pat<(!cast("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (!cast(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + def : Pat<(!cast("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (!cast(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + + // Masked trunc stores + def : Pat<(!cast("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7:$addr, VCCR:$pred), + (!cast(StoreInst) MQPR:$val, taddrmode_imm7:$addr, ARMVCCThen, VCCR:$pred)>; + def : Pat<(!cast("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (!cast(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, ARMVCCThen, VCCR:$pred)>; + def : Pat<(!cast("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (!cast(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, ARMVCCThen, VCCR:$pred)>; + + // Ext loads + def : Pat<(VT (!cast("aligned_extload"#Amble) taddrmode_imm7:$addr)), + (VT (LoadUInst taddrmode_imm7:$addr))>; + def : Pat<(VT (!cast("aligned_sextload"#Amble) taddrmode_imm7:$addr)), + (VT (LoadSInst taddrmode_imm7:$addr))>; + def : Pat<(VT (!cast("aligned_zextload"#Amble) taddrmode_imm7:$addr)), + (VT (LoadUInst taddrmode_imm7:$addr))>; + + // Masked ext loads + def : Pat<(VT (!cast("aligned_extmaskedload"#Amble) taddrmode_imm7:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadUInst taddrmode_imm7:$addr, ARMVCCThen, VCCR:$pred))>; + def : Pat<(VT (!cast("aligned_sextmaskedload"#Amble) taddrmode_imm7:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadSInst taddrmode_imm7:$addr, ARMVCCThen, VCCR:$pred))>; + def : Pat<(VT (!cast("aligned_zextmaskedload"#Amble) taddrmode_imm7:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadUInst taddrmode_imm7:$addr, ARMVCCThen, VCCR:$pred))>; } let Predicates = [HasMVEInt] in { - defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; - defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; - defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; + defm : MVEExtLoadStore; + defm : MVEExtLoadStore; + defm : MVEExtLoadStore; } diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 94bb45bde5739..6244d8d9e27e5 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -3314,30 +3314,30 @@ class N2VCvtQ op11_8, bit op7, bit op4, // source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm, int fc> { + string asm, PatFrag fc> { // 64-bit vector types. def v8i8 : N2V; + [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; def v4i16 : N2V; + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; def v2i32 : N2V; + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; def v2f32 : N2V { + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { let Inst{10} = 1; // overwrite F = 1 } def v4f16 : N2V, + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3346,25 +3346,25 @@ multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, def v16i8 : N2V; + [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; def v8i16 : N2V; + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; def v4i32 : N2V; + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; def v4f32 : N2V { + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { let Inst{10} = 1; // overwrite F = 1 } def v8f16 : N2V, + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3373,11 +3373,11 @@ multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 3-register comparisons. class N3VQ_cmp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> : N3V { + [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { // All of these have a two-operand InstAlias. let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; @@ -3385,11 +3385,11 @@ class N3VQ_cmp op21_20, bits<4> op11_8, bit op4, class N3VD_cmp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> : N3V { + [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { // All of these have a two-operand InstAlias. let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; @@ -3399,7 +3399,7 @@ multiclass N3V_QHS_cmp op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - int fc, bit Commutable = 0> { + PatFrag fc, bit Commutable = 0> { // 64-bit vector types. def v8i8 : N3VD_cmp; + "vqadd", "s", saddsat, 1>; defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqadd", "u", int_arm_neon_vqaddu, 1>; + "vqadd", "u", uaddsat, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) @@ -4527,22 +4527,22 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqadds + def : Pat<(v4i16 (saddsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v2i32 (int_arm_neon_vqadds + def : Pat<(v2i32 (saddsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v8i16 (int_arm_neon_vqadds + def : Pat<(v8i16 (saddsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))))), (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - def : Pat<(v4i32 (int_arm_neon_vqadds + def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))))), @@ -4551,7 +4551,7 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqadds + def : Pat<(v4i16 (saddsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), @@ -4559,7 +4559,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; - def : Pat<(v2i32 (int_arm_neon_vqadds + def : Pat<(v2i32 (saddsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), @@ -4567,7 +4567,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane))>; - def : Pat<(v8i16 (int_arm_neon_vqadds + def : Pat<(v8i16 (saddsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src2), @@ -4579,7 +4579,7 @@ let Predicates = [HasNEON, HasV8_1a] in { QPR:$src3, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; - def : Pat<(v4i32 (int_arm_neon_vqadds + def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src2), @@ -4597,22 +4597,22 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqsubs + def : Pat<(v4i16 (ssubsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v2i32 (int_arm_neon_vqsubs + def : Pat<(v2i32 (ssubsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v8i16 (int_arm_neon_vqsubs + def : Pat<(v8i16 (ssubsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))))), (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - def : Pat<(v4i32 (int_arm_neon_vqsubs + def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))))), @@ -4621,14 +4621,14 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqsubs + def : Pat<(v4i16 (ssubsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), imm:$lane)))))), (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; - def : Pat<(v2i32 (int_arm_neon_vqsubs + def : Pat<(v2i32 (ssubsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), @@ -4636,7 +4636,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane))>; - def : Pat<(v8i16 (int_arm_neon_vqsubs + def : Pat<(v8i16 (ssubsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src2), @@ -4648,7 +4648,7 @@ let Predicates = [HasNEON, HasV8_1a] in { QPR:$src3, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; - def : Pat<(v4i32 (int_arm_neon_vqsubs + def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src2), @@ -4667,20 +4667,20 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; let Predicates = [HasNEON] in { -def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), +def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), +def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), +def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), imm:$lane)))))), (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; -def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), +def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), imm:$lane)))))), @@ -4759,20 +4759,20 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; let Predicates = [HasNEON] in { -def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), +def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), +def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), +def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), imm:$lane)))))), (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; -def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), +def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), imm:$lane)))))), @@ -5012,6 +5012,27 @@ defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; +let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), + (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; + def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), + (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; + def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), + (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; + def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), + (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; +} +let Predicates = [HasNEON,HasV8_3a] in { + def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), + (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; + def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), + (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; + def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), + (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; + def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), + (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; +} + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -5045,10 +5066,10 @@ defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, // VQSUB : Vector Saturing Subtract defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vqsub", "s", int_arm_neon_vqsubs, 0>; + "vqsub", "s", ssubsat, 0>; defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vqsub", "u", int_arm_neon_vqsubu, 0>; + "vqsub", "u", usubsat, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) @@ -5068,66 +5089,66 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), // VCEQ : Vector Compare Equal defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", 0, 1>; + IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - 0, 1>; + ARMCCeq, 1>; def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - 0, 1>; + ARMCCeq, 1>; def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - 0, 1>, + ARMCCeq, 1>, Requires<[HasNEON, HasFullFP16]>; def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - 0, 1>, + ARMCCeq, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", 0>; + "$Vd, $Vm, #0", ARMCCeq>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", 10, 0>; + IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", 2, 0>; + IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - 10, 0>; + ARMCCge, 0>; def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - 10, 0>; + ARMCCge, 0>; def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - 10, 0>, + ARMCCge, 0>, Requires<[HasNEON, HasFullFP16]>; def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - 10, 0>, + ARMCCge, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", 10>; + "$Vd, $Vm, #0", ARMCCge>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", 13>; + "$Vd, $Vm, #0", ARMCCle>; } // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", 12, 0>; + IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", 8, 0>; + IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - 12, 0>; + ARMCCgt, 0>; def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - 12, 0>; + ARMCCgt, 0>; def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - 12, 0>, + ARMCCgt, 0>, Requires<[HasNEON, HasFullFP16]>; def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - 12, 0>, + ARMCCgt, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", 12>; + "$Vd, $Vm, #0", ARMCCgt>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", 11>; + "$Vd, $Vm, #0", ARMCClt>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index fdd961bfbb2f7..90be9a0333ed3 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2279,6 +2279,12 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), [(set (f32 SPR:$Sd), (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>; + +def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), + IIC_fpUNA16, + [(set (f16 HPR:$Sd), + (ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>; } // hasSideEffects //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 733a3f166069f..756d0fdb55702 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -25,6 +25,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/MC/MCInstrDesc.h" using namespace llvm; @@ -104,15 +106,45 @@ namespace { // Is it safe to define LR with DLS/WLS? // LR can be defined if it is the operand to start, because it's the same // value, or if it's going to be equivalent to the operand to Start. - MachineInstr *IsSafeToDefineLR(); + MachineInstr *IsSafeToDefineLR(ReachingDefAnalysis *RDA); - // Check the branch targets are within range and we satisfy our restructi - void CheckLegality(ARMBasicBlockUtils *BBUtils); + // Check the branch targets are within range and we satisfy our + // restrictions. + void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI); bool FoundAllComponents() const { return Start && Dec && End; } + // Return the loop iteration count, or the number of elements if we're tail + // predicating. + MachineOperand &getCount() { + return IsTailPredicationLegal() ? + VCTP->getOperand(1) : Start->getOperand(0); + } + + unsigned getStartOpcode() const { + bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; + if (!IsTailPredicationLegal()) + return IsDo ? ARM::t2DLS : ARM::t2WLS; + + switch (VCTP->getOpcode()) { + default: + llvm_unreachable("unhandled vctp opcode"); + break; + case ARM::MVE_VCTP8: + return IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; + case ARM::MVE_VCTP16: + return IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; + case ARM::MVE_VCTP32: + return IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; + case ARM::MVE_VCTP64: + return IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; + } + return 0; + } + void dump() const { if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; @@ -127,6 +159,8 @@ namespace { class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; + MachineLoopInfo *MLI = nullptr; + ReachingDefAnalysis *RDA = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; std::unique_ptr BBUtils = nullptr; @@ -139,6 +173,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -146,7 +181,8 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); + MachineFunctionProperties::Property::NoVRegs).set( + MachineFunctionProperties::Property::TracksLiveness); } StringRef getPassName() const override { @@ -183,31 +219,6 @@ static bool IsLoopStart(MachineInstr &MI) { MI.getOpcode() == ARM::t2WhileLoopStart; } -template -static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) { - for(auto &MI : make_range(T(Begin), End)) { - for (auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) - continue; - return &MI; - } - } - return nullptr; -} - -static MachineInstr* SearchForUse(MachineInstr *Begin, - MachineBasicBlock::iterator End, - unsigned Reg) { - for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) { - for (auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - return &MI; - } - } - return nullptr; -} - static bool IsVCTP(MachineInstr *MI) { switch (MI->getOpcode()) { default: @@ -221,73 +232,42 @@ static bool IsVCTP(MachineInstr *MI) { return false; } -MachineInstr *LowOverheadLoop::IsSafeToDefineLR() { +MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { + // We can define LR because LR already contains the same value. + if (Start->getOperand(0).getReg() == ARM::LR) + return Start; - auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) { + unsigned CountReg = Start->getOperand(0).getReg(); + auto IsMoveLR = [&CountReg](MachineInstr *MI) { return MI->getOpcode() == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::LR && - MI->getOperand(1).getReg() == Reg && + MI->getOperand(1).getReg() == CountReg && MI->getOperand(2).getImm() == ARMCC::AL; }; MachineBasicBlock *MBB = Start->getParent(); - unsigned CountReg = Start->getOperand(0).getReg(); - // Walk forward and backward in the block to find the closest instructions - // that define LR. Then also filter them out if they're not a mov lr. - MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR); - if (PredLRDef && !IsMoveLR(PredLRDef, CountReg)) - PredLRDef = nullptr; - - MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR); - if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg)) - SuccLRDef = nullptr; - - // We've either found one, two or none mov lr instructions... Now figure out - // if they are performing the equilvant mov that the Start instruction will. - // Do this by scanning forward and backward to see if there's a def of the - // register holding the count value. If we find a suitable def, return it as - // the insert point. Later, if InsertPt != Start, then we can remove the - // redundant instruction. - if (SuccLRDef) { - MachineBasicBlock::iterator End(SuccLRDef); - if (!SearchForDef(Start, End, CountReg)) { - return SuccLRDef; - } else - SuccLRDef = nullptr; - } - if (PredLRDef) { - MachineBasicBlock::reverse_iterator End(PredLRDef); - if (!SearchForDef(Start, End, CountReg)) { - return PredLRDef; - } else - PredLRDef = nullptr; - } - // We can define LR because LR already contains the same value. - if (Start->getOperand(0).getReg() == ARM::LR) - return Start; + // Find an insertion point: + // - Is there a (mov lr, Count) before Start? If so, and nothing else writes + // to Count before Start, we can insert at that mov. + // - Is there a (mov lr, Count) after Start? If so, and nothing else writes + // to Count after Start, we can insert at that mov. + if (auto *LRDef = RDA->getReachingMIDef(&MBB->back(), ARM::LR)) { + if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg)) + return LRDef; + } // We've found no suitable LR def and Start doesn't use LR directly. Can we - // just define LR anyway? - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - LivePhysRegs LiveRegs(*TRI); - LiveRegs.addLiveOuts(*MBB); - - // Not if we've haven't found a suitable mov and LR is live out. - if (LiveRegs.contains(ARM::LR)) - return nullptr; - - // If LR is not live out, we can insert the instruction if nothing else - // uses LR after it. - if (!SearchForUse(Start, MBB->end(), ARM::LR)) + // just define LR anyway? + if (!RDA->isRegUsedAfter(Start, ARM::LR)) return Start; - LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for" - << " LR\n"); return nullptr; } -void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) { +void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, + ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI) { if (Revert) return; @@ -320,18 +300,74 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) { return; } - InsertPt = Revert ? nullptr : IsSafeToDefineLR(); + InsertPt = Revert ? nullptr : IsSafeToDefineLR(RDA); if (!InsertPt) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; + return; } else LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); - LLVM_DEBUG(if (IsTailPredicationLegal()) { - dbgs() << "ARM Loops: Will use tail predication to convert:\n"; + // For tail predication, we need to provide the number of elements, instead + // of the iteration count, to the loop start instruction. The number of + // elements is provided to the vctp instruction, so we need to check that + // we can use this register at InsertPt. + if (!IsTailPredicationLegal()) + return; + + Register NumElements = VCTP->getOperand(1).getReg(); + + // If the register is defined within loop, then we can't perform TP. + // TODO: Check whether this is just a mov of a register that would be + // available. + if (RDA->getReachingDef(VCTP, NumElements) >= 0) { + CannotTailPredicate = true; + return; + } + + // We can't perform TP if the register does not hold the same value at + // InsertPt as the liveout value. + MachineBasicBlock *InsertBB = InsertPt->getParent(); + if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(), + NumElements)) { + CannotTailPredicate = true; + return; + } + + // Especially in the case of while loops, InsertBB may not be the + // preheader, so we need to check that the register isn't redefined + // before entering the loop. + auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB, + Register NumElements) { + // NumElements is redefined in this block. + if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0) + return true; + + // Don't continue searching up through multiple predecessors. + if (MBB->pred_size() > 1) + return true; + + return false; + }; + + // First, find the block that looks like the preheader. + MachineBasicBlock *MBB = MLI->findLoopPreheader(ML, true); + if (!MBB) { + CannotTailPredicate = true; + return; + } + + // Then search backwards for a def, until we get to InsertBB. + while (MBB != InsertBB) { + CannotTailPredicate = CannotProvideElements(MBB, NumElements); + if (CannotTailPredicate) + return; + MBB = *MBB->pred_begin(); + } + + LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication to convert:\n"; for (auto *MI : VPTUsers) - dbgs() << " - " << *MI; - }); + dbgs() << " - " << *MI;); } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { @@ -342,7 +378,8 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { MF = &mf; LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); - auto &MLI = getAnalysis(); + MLI = &getAnalysis(); + RDA = &getAnalysis(); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); TII = static_cast(ST.getInstrInfo()); @@ -351,7 +388,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; - for (auto ML : MLI) { + for (auto ML : *MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } @@ -367,7 +404,14 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { for (auto I = ML->begin(), E = ML->end(); I != E; ++I) Changed |= ProcessLoop(*I); - LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); + LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n"; + if (auto *Preheader = ML->getLoopPreheader()) + dbgs() << " - " << Preheader->getName() << "\n"; + else if (auto *Preheader = MLI->findLoopPreheader(ML)) + dbgs() << " - " << Preheader->getName() << "\n"; + for (auto *MBB : ML->getBlocks()) + dbgs() << " - " << MBB->getName() << "\n"; + ); // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. @@ -383,28 +427,15 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { }; LowOverheadLoop LoLoop(ML); - // Search the preheader for the start intrinsic, or look through the - // predecessors of the header to find exactly one set.iterations intrinsic. + // Search the preheader for the start intrinsic. // FIXME: I don't see why we shouldn't be supporting multiple predecessors // with potentially multiple set.loop.iterations, so we need to enable this. if (auto *Preheader = ML->getLoopPreheader()) LoLoop.Start = SearchForStart(Preheader); - else { - LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n" - << " - Performing manual predecessor search.\n"); - MachineBasicBlock *Pred = nullptr; - for (auto *MBB : ML->getHeader()->predecessors()) { - if (!ML->contains(MBB)) { - if (Pred) { - LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n"); - LoLoop.Start = nullptr; - break; - } - Pred = MBB; - LoLoop.Start = SearchForStart(MBB); - } - } - } + else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) + LoLoop.Start = SearchForStart(Preheader); + else + return false; // Find the low-overhead loop components and decide whether or not to fall // back to a normal loop. Also look for a vctp instructions and decide @@ -462,7 +493,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { if (!LoLoop.FoundAllComponents()) return false; - LoLoop.CheckLegality(BBUtils.get()); + LoLoop.CheckLegality(BBUtils.get(), RDA, MLI); Expand(LoLoop); return true; } @@ -493,19 +524,15 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { } bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, - bool AllowFlags) const { + bool SetFlags) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); - // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS. - bool SetFlags = false; - if (AllowFlags) { - if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) { - if (!SearchForUse(MI, MBB->end(), ARM::CPSR) && - Def->getOpcode() == ARM::t2LoopEnd) - SetFlags = true; - } - } + // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS. + if (SetFlags && + (RDA->isRegUsedAfter(MI, ARM::CPSR) || + !RDA->hasSameReachingDef(MI, &MBB->back(), ARM::CPSR))) + SetFlags = false; MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); @@ -558,35 +585,45 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { MachineInstr *Start = LoLoop.Start; MachineBasicBlock *MBB = InsertPt->getParent(); bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; - unsigned Opc = 0; - - if (!LoLoop.IsTailPredicationLegal()) - Opc = IsDo ? ARM::t2DLS : ARM::t2WLS; - else { - switch (LoLoop.VCTP->getOpcode()) { - case ARM::MVE_VCTP8: - Opc = IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; - break; - case ARM::MVE_VCTP16: - Opc = IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; - break; - case ARM::MVE_VCTP32: - Opc = IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; - break; - case ARM::MVE_VCTP64: - Opc = IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; - break; - } - } + unsigned Opc = LoLoop.getStartOpcode(); + MachineOperand &Count = LoLoop.getCount(); MachineInstrBuilder MIB = BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); - MIB.add(Start->getOperand(0)); + MIB.add(Count); if (!IsDo) MIB.add(Start->getOperand(1)); + // When using tail-predication, try to delete the dead code that was used to + // calculate the number of loop iterations. + if (LoLoop.IsTailPredicationLegal()) { + SmallVector Killed; + SmallVector Dead; + if (auto *Def = RDA->getReachingMIDef(Start, + Start->getOperand(0).getReg())) { + Killed.push_back(Def); + + while (!Killed.empty()) { + MachineInstr *Def = Killed.back(); + Killed.pop_back(); + Dead.push_back(Def); + for (auto &MO : Def->operands()) { + if (!MO.isReg() || !MO.isKill()) + continue; + + MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg()); + if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1) + Killed.push_back(Kill); + } + } + for (auto *MI : Dead) + MI->eraseFromParent(); + } + } + + // If we're inserting at a mov lr, then remove it as it's redundant. if (InsertPt != Start) InsertPt->eraseFromParent(); Start->eraseFromParent(); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 7a57376a68953..eb4d39b01cbbf 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -98,9 +98,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const ARMBaseTargetMachine &TM, bool IsLittle, bool MinSize) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - ReservedGPRegisters(ARM::GPRRegClass.getNumRegs()), CPUString(CPU), - OptMinSize(MinSize), IsLittle(IsLittle), TargetTriple(TT), - Options(TM.Options), TM(TM), + CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), + TargetTriple(TT), Options(TM.Options), TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. @@ -254,18 +253,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { (Options.UnsafeFPMath || isTargetDarwin())) UseNEONForSinglePrecisionFP = true; - if (isRWPI() || (isTargetMachO() && !HasV6Ops)) - ReservedGPRegisters.set(9); - - // Throw an error when trying to reserve a target's FP register. It may - // be used by the compiler even when frame pointer elimination is enabled. - // FIXME: Throw this error if -frame-pointer=none is not set; otherwise - // only emit a warning. - const int restFP = (useR7AsFramePointer()) ? 7 : 11; - if (isGPRegisterReserved(restFP)) - report_fatal_error( - "Register r" + std::to_string(restFP) + - " has been specified but is used as the frame pointer for this target."); + if (isRWPI()) + ReserveR9 = true; // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2 if (MVEVectorCostFactor == 0) diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index c5836a3eca7b7..f582a92f65639 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -229,8 +229,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM = false; - // ReservedGPRegisters[i] - R#i is not available as a general purpose register - BitVector ReservedGPRegisters; + /// ReserveR9 - True if R9 is not available as a general purpose register. + bool ReserveR9 = false; /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of /// 32-bit imms (including global addresses). @@ -763,9 +763,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool isAClass() const { return ARMProcClass == AClass; } bool isReadTPHard() const { return ReadTPHard; } - bool isGPRegisterReserved(size_t i) const { return ReservedGPRegisters[i]; } - unsigned getNumGPRegistersReserved() const { - return ReservedGPRegisters.count(); + bool isR9Reserved() const { + return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } bool useR7AsFramePointer() const { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 10f68542e7e1e..018ce3903c2d7 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -91,7 +91,6 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMParallelDSPPass(Registry); - initializeARMCodeGenPreparePass(Registry); initializeARMConstantIslandsPass(Registry); initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); @@ -422,7 +421,7 @@ void ARMPassConfig::addIRPasses() { void ARMPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None) - addPass(createARMCodeGenPreparePass()); + addPass(createTypePromotionPass()); TargetPassConfig::addCodeGenPrepare(); } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index c1fd01d2df9d5..5bb3bcaf10e77 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -76,9 +76,7 @@ class ARMTTIImpl : public BasicTTIImplBase { ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, - ARM::FeatureExecuteOnly, ARM::FeatureReserveR6, ARM::FeatureReserveR7, - ARM::FeatureReserveR8, ARM::FeatureReserveR9, ARM::FeatureReserveR10, - ARM::FeatureReserveR11, ARM::FeatureNoMovt, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates }; diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index 5ce28f29defbc..b94a78ea9404f 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -25,7 +25,6 @@ add_llvm_target(ARMCodeGen ARMBasicBlockInfo.cpp ARMCallingConv.cpp ARMCallLowering.cpp - ARMCodeGenPrepare.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index 397f900447700..e8bc43dbe2ddb 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -485,10 +485,15 @@ bool MVETailPredication::TryConvert(Value *TripCount) { switch (VecTy->getNumElements()) { default: llvm_unreachable("unexpected number of lanes"); - case 2: VCTPID = Intrinsic::arm_vctp64; break; - case 4: VCTPID = Intrinsic::arm_vctp32; break; - case 8: VCTPID = Intrinsic::arm_vctp16; break; - case 16: VCTPID = Intrinsic::arm_vctp8; break; + case 4: VCTPID = Intrinsic::arm_mve_vctp32; break; + case 8: VCTPID = Intrinsic::arm_mve_vctp16; break; + case 16: VCTPID = Intrinsic::arm_mve_vctp8; break; + + // FIXME: vctp64 currently not supported because the predicate + // vector wants to be <2 x i1>, but v2i1 is not a legal MVE + // type, so problems happen at isel time. + // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics + // purposes, but takes a v4i1 instead of a v2i1. } Function *VCTP = Intrinsic::getDeclaration(M, VCTPID); Value *TailPredicate = Builder.CreateCall(VCTP, Processed); diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h index aa3aca359cb8d..11cb1a162e2ba 100644 --- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h @@ -64,6 +64,25 @@ inline static CondCodes getOppositeCondition(CondCodes CC) { case LE: return GT; } } + +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::EQ: return ARMCC::EQ; + case ARMCC::NE: return ARMCC::NE; + case ARMCC::HS: return ARMCC::LS; + case ARMCC::LO: return ARMCC::HI; + case ARMCC::HI: return ARMCC::LO; + case ARMCC::LS: return ARMCC::HS; + case ARMCC::GE: return ARMCC::LE; + case ARMCC::LT: return ARMCC::GT; + case ARMCC::GT: return ARMCC::LT; + case ARMCC::LE: return ARMCC::GE; + } +} } // end namespace ARMCC namespace ARMVCC { diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 3af29a2e698b3..a28816cc87b7d 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -829,9 +829,13 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, RecordAlignment); } - // Access key is the type name + reloc type + patched imm + access string, + // Access key is the + // "llvm." + type name + ":" + reloc type + ":" + patched imm + "$" + + // access string, // uniquely identifying one relocation. - AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" + + // The prefix "llvm." indicates this is a temporary global, which should + // not be emitted to ELF file. + AccessKey = "llvm." + TypeName + ":" + std::to_string(InfoKind) + ":" + std::to_string(PatchImm) + "$" + AccessKey; return Base; diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 7f52812179534..639ee2df96a9d 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -252,8 +252,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); - void expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI, bool IsLoad); + void expandMem16Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad); + void expandMem9Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad); bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); @@ -1824,11 +1826,14 @@ static bool needsExpandMemInst(MCInst &Inst) { const MCOperandInfo &OpInfo = MCID.OpInfo[NumOp - 1]; if (OpInfo.OperandType != MCOI::OPERAND_MEMORY && - OpInfo.OperandType != MCOI::OPERAND_UNKNOWN) + OpInfo.OperandType != MCOI::OPERAND_UNKNOWN && + OpInfo.OperandType != MipsII::OPERAND_MEM_SIMM9) return false; MCOperand &Op = Inst.getOperand(NumOp - 1); if (Op.isImm()) { + if (OpInfo.OperandType == MipsII::OPERAND_MEM_SIMM9) + return !isInt<9>(Op.getImm()); // Offset can't exceed 16bit value. return !isInt<16>(Op.getImm()); } @@ -2133,7 +2138,15 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, // Check the offset of memory operand, if it is a symbol // reference or immediate we may have to expand instructions. if (needsExpandMemInst(Inst)) { - expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + switch (MCID.OpInfo[MCID.getNumOperands() - 1].OperandType) { + case MipsII::OPERAND_MEM_SIMM9: + expandMem9Inst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + break; + default: + expandMem16Inst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + break; + } return getParser().hasPendingError(); } } @@ -3631,20 +3644,26 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } -void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI, bool IsLoad) { - const MCOperand &DstRegOp = Inst.getOperand(0); +void MipsAsmParser::expandMem16Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad) { + unsigned NumOp = Inst.getNumOperands(); + assert((NumOp == 3 || NumOp == 4) && "unexpected operands number"); + unsigned StartOp = NumOp == 3 ? 0 : 1; + + const MCOperand &DstRegOp = Inst.getOperand(StartOp); assert(DstRegOp.isReg() && "expected register operand kind"); - const MCOperand &BaseRegOp = Inst.getOperand(1); + const MCOperand &BaseRegOp = Inst.getOperand(StartOp + 1); assert(BaseRegOp.isReg() && "expected register operand kind"); + const MCOperand &OffsetOp = Inst.getOperand(StartOp + 2); MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned OpCode = Inst.getOpcode(); unsigned DstReg = DstRegOp.getReg(); unsigned BaseReg = BaseRegOp.getReg(); unsigned TmpReg = DstReg; - const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode()); - int16_t DstRegClass = Desc.OpInfo[0].RegClass; + const MCInstrDesc &Desc = getInstDesc(OpCode); + int16_t DstRegClass = Desc.OpInfo[StartOp].RegClass; unsigned DstRegClassID = getContext().getRegisterInfo()->getRegClass(DstRegClass).getID(); bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) || @@ -3658,25 +3677,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return; } - if (Inst.getNumOperands() > 3) { - const MCOperand &BaseRegOp = Inst.getOperand(2); - assert(BaseRegOp.isReg() && "expected register operand kind"); - const MCOperand &ExprOp = Inst.getOperand(3); - assert(ExprOp.isExpr() && "expected expression oprand kind"); - - unsigned BaseReg = BaseRegOp.getReg(); - const MCExpr *ExprOffset = ExprOp.getExpr(); - - MCOperand LoOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); - MCOperand HiOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); - TOut.emitSCWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, - LoOperand, TmpReg, IDLoc, STI); - return; - } - - const MCOperand &OffsetOp = Inst.getOperand(2); + auto emitInstWithOffset = [&](const MCOperand &Off) { + if (NumOp == 3) + TOut.emitRRX(OpCode, DstReg, TmpReg, Off, IDLoc, STI); + else + TOut.emitRRRX(OpCode, DstReg, DstReg, TmpReg, Off, IDLoc, STI); + }; if (OffsetOp.isImm()) { int64_t LoOffset = OffsetOp.getImm() & 0xffff; @@ -3690,16 +3696,16 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, bool IsLargeOffset = HiOffset != 0; if (IsLargeOffset) { - bool Is32BitImm = (HiOffset >> 32) == 0; + bool Is32BitImm = isInt<32>(OffsetOp.getImm()); if (loadImmediate(HiOffset, TmpReg, Mips::NoRegister, Is32BitImm, true, IDLoc, Out, STI)) return; } if (BaseReg != Mips::ZERO && BaseReg != Mips::ZERO_64) - TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, - BaseReg, IDLoc, STI); - TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, LoOffset, IDLoc, STI); + TOut.emitRRR(ABI.ArePtrs64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, + TmpReg, BaseReg, IDLoc, STI); + emitInstWithOffset(MCOperand::createImm(int16_t(LoOffset))); return; } @@ -3723,26 +3729,41 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, loadAndAddSymbolAddress(Res.getSymA(), TmpReg, BaseReg, !ABI.ArePtrs64bit(), IDLoc, Out, STI); - TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, Res.getConstant(), IDLoc, - STI); + emitInstWithOffset(MCOperand::createImm(int16_t(Res.getConstant()))); } else { // FIXME: Implement 64-bit case. // 1) lw $8, sym => lui $8, %hi(sym) // lw $8, %lo(sym)($8) // 2) sw $8, sym => lui $at, %hi(sym) // sw $8, %lo(sym)($at) - const MCExpr *ExprOffset = OffsetOp.getExpr(); + const MCExpr *OffExpr = OffsetOp.getExpr(); MCOperand LoOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); + MipsMCExpr::create(MipsMCExpr::MEK_LO, OffExpr, getContext())); MCOperand HiOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); - - // Generate the base address in TmpReg. - TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); - if (BaseReg != Mips::ZERO) - TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); - // Emit the load or store with the adjusted base and offset. - TOut.emitRRX(Inst.getOpcode(), DstReg, TmpReg, LoOperand, IDLoc, STI); + MipsMCExpr::create(MipsMCExpr::MEK_HI, OffExpr, getContext())); + + if (ABI.IsN64()) { + MCOperand HighestOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, OffExpr, getContext())); + MCOperand HigherOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, OffExpr, getContext())); + + TOut.emitRX(Mips::LUi, TmpReg, HighestOperand, IDLoc, STI); + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, HigherOperand, IDLoc, STI); + TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, STI); + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, HiOperand, IDLoc, STI); + TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, STI); + if (BaseReg != Mips::ZERO && BaseReg != Mips::ZERO_64) + TOut.emitRRR(Mips::DADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); + emitInstWithOffset(LoOperand); + } else { + // Generate the base address in TmpReg. + TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); + if (BaseReg != Mips::ZERO) + TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); + // Emit the load or store with the adjusted base and offset. + emitInstWithOffset(LoOperand); + } } return; } @@ -3750,6 +3771,64 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, llvm_unreachable("unexpected operand type"); } +void MipsAsmParser::expandMem9Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad) { + unsigned NumOp = Inst.getNumOperands(); + assert((NumOp == 3 || NumOp == 4) && "unexpected operands number"); + unsigned StartOp = NumOp == 3 ? 0 : 1; + + const MCOperand &DstRegOp = Inst.getOperand(StartOp); + assert(DstRegOp.isReg() && "expected register operand kind"); + const MCOperand &BaseRegOp = Inst.getOperand(StartOp + 1); + assert(BaseRegOp.isReg() && "expected register operand kind"); + const MCOperand &OffsetOp = Inst.getOperand(StartOp + 2); + + MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned OpCode = Inst.getOpcode(); + unsigned DstReg = DstRegOp.getReg(); + unsigned BaseReg = BaseRegOp.getReg(); + unsigned TmpReg = DstReg; + + const MCInstrDesc &Desc = getInstDesc(OpCode); + int16_t DstRegClass = Desc.OpInfo[StartOp].RegClass; + unsigned DstRegClassID = + getContext().getRegisterInfo()->getRegClass(DstRegClass).getID(); + bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) || + (DstRegClassID == Mips::GPR64RegClassID); + + if (!IsLoad || !IsGPR || (BaseReg == DstReg)) { + // At this point we need AT to perform the expansions + // and we exit if it is not available. + TmpReg = getATReg(IDLoc); + if (!TmpReg) + return; + } + + auto emitInst = [&]() { + if (NumOp == 3) + TOut.emitRRX(OpCode, DstReg, TmpReg, MCOperand::createImm(0), IDLoc, STI); + else + TOut.emitRRRX(OpCode, DstReg, DstReg, TmpReg, MCOperand::createImm(0), + IDLoc, STI); + }; + + if (OffsetOp.isImm()) { + loadImmediate(OffsetOp.getImm(), TmpReg, BaseReg, !ABI.ArePtrs64bit(), true, + IDLoc, Out, STI); + emitInst(); + return; + } + + if (OffsetOp.isExpr()) { + loadAndAddSymbolAddress(OffsetOp.getExpr(), TmpReg, BaseReg, + !ABI.ArePtrs64bit(), IDLoc, Out, STI); + emitInst(); + return; + } + + llvm_unreachable("unexpected operand type"); +} + bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 3c11edfc3fc78..02ab5ede2c1a4 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -16,6 +16,7 @@ #include "MipsFixupKinds.h" #include "MipsMCTargetDesc.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -127,6 +128,12 @@ namespace MipsII { HasFCCRegOperand = 1 << 6 }; + + enum OperandType : unsigned { + OPERAND_FIRST_MIPS_MEM_IMM = MCOI::OPERAND_FIRST_TARGET, + OPERAND_MEM_SIMM9 = OPERAND_FIRST_MIPS_MEM_IMM, + OPERAND_LAST_MIPS_MEM_IMM = OPERAND_MEM_SIMM9 + }; } } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index d84e4eada6466..d0b3c204730fb 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -12,6 +12,7 @@ #include "MipsMCTargetDesc.h" #include "MipsAsmBackend.h" +#include "MipsBaseInfo.h" #include "MipsELFStreamer.h" #include "MipsInstPrinter.h" #include "MipsMCAsmInfo.h" diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 3ff9c722484bf..bdfb70aa98131 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -74,27 +74,23 @@ void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo) { unsigned Value = 0; - for (MCSubRegIterator SubRegIt(Reg, MCRegInfo, true); SubRegIt.isValid(); - ++SubRegIt) { - unsigned CurrentSubReg = *SubRegIt; - - unsigned EncVal = MCRegInfo->getEncodingValue(CurrentSubReg); + for (const MCPhysReg &SubReg : MCRegInfo->subregs_inclusive(Reg)) { + unsigned EncVal = MCRegInfo->getEncodingValue(SubReg); Value |= 1 << EncVal; - if (GPR32RegClass->contains(CurrentSubReg) || - GPR64RegClass->contains(CurrentSubReg)) + if (GPR32RegClass->contains(SubReg) || GPR64RegClass->contains(SubReg)) ri_gprmask |= Value; - else if (COP0RegClass->contains(CurrentSubReg)) + else if (COP0RegClass->contains(SubReg)) ri_cprmask[0] |= Value; // MIPS COP1 is the FPU. - else if (FGR32RegClass->contains(CurrentSubReg) || - FGR64RegClass->contains(CurrentSubReg) || - AFGR64RegClass->contains(CurrentSubReg) || - MSA128BRegClass->contains(CurrentSubReg)) + else if (FGR32RegClass->contains(SubReg) || + FGR64RegClass->contains(SubReg) || + AFGR64RegClass->contains(SubReg) || + MSA128BRegClass->contains(SubReg)) ri_cprmask[1] |= Value; - else if (COP2RegClass->contains(CurrentSubReg)) + else if (COP2RegClass->contains(SubReg)) ri_cprmask[2] |= Value; - else if (COP3RegClass->contains(CurrentSubReg)) + else if (COP3RegClass->contains(SubReg)) ri_cprmask[3] |= Value; } } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index b6dae9f6dea82..054dc79f4aa91 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -34,11 +34,6 @@ static cl::opt RoundSectionSizes( cl::desc("Round section sizes up to the section alignment"), cl::Hidden); } // end anonymous namespace -static bool isMipsR6(const MCSubtargetInfo *STI) { - return STI->getFeatureBits()[Mips::FeatureMips32r6] || - STI->getFeatureBits()[Mips::FeatureMips64r6]; -} - static bool isMicroMips(const MCSubtargetInfo *STI) { return STI->getFeatureBits()[Mips::FeatureMicroMips]; } @@ -332,36 +327,6 @@ void MipsTargetStreamer::emitStoreWithImmOffset( emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI); } -/// Emit a store instruction with an symbol offset. -void MipsTargetStreamer::emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, - unsigned BaseReg, - MCOperand &HiOperand, - MCOperand &LoOperand, - unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - // sc $8, sym => lui $at, %hi(sym) - // sc $8, %lo(sym)($at) - - // Generate the base address in ATReg. - emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI); - if (!isMicroMips(STI) && isMipsR6(STI)) { - // For non-micromips r6 offset for 'sc' is not in the lower 16 bits so we - // put it in 'at'. - // sc $8, sym => lui $at, %hi(sym) - // addiu $at, $at, %lo(sym) - // sc $8, 0($at) - emitRRX(Mips::ADDiu, ATReg, ATReg, LoOperand, IDLoc, STI); - MCOperand Offset = MCOperand::createImm(0); - // Emit the store with the adjusted base and offset. - emitRRRX(Opcode, SrcReg, SrcReg, ATReg, Offset, IDLoc, STI); - } else { - if (BaseReg != Mips::ZERO) - emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); - // Emit the store with the adjusted base and offset. - emitRRRX(Opcode, SrcReg, SrcReg, ATReg, LoOperand, IDLoc, STI); - } -} - /// Emit a load instruction with an immediate offset. DstReg and TmpReg are /// permitted to be the same register iff DstReg is distinct from BaseReg and /// DstReg is a GPR. It is the callers responsibility to identify such cases diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td index a735d45ddbfcf..9607d008bc979 100644 --- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td @@ -765,12 +765,12 @@ class LL_R6_DESC_BASE; +class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9_exp, II_LL>; class SC_R6_DESC_BASE { dag OutOperandList = (outs GPROpnd:$dst); - dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr); + dag InOperandList = (ins GPROpnd:$rt, mem_simm9_exp:$addr); string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); list Pattern = []; bit mayStore = 1; diff --git a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td index efebd77e531fe..33132d9ede92a 100644 --- a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td @@ -75,7 +75,7 @@ class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>; class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMUL>; class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, II_LDPC>; class LWUPC_DESC : PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>; -class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simmptr, II_LLD>; +class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simm9_exp, II_LLD>; class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>; class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>; class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>; @@ -106,7 +106,7 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd, list Defs = [AT]; } -class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>; +class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9_exp, II_LL>; class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>; class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> { diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 353f10c8c64c1..be556cfba4dc3 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -257,6 +257,10 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(*OutStreamer, &*I)) continue; + // Skip the BUNDLE pseudo instruction and lower the contents + if (I->isBundle()) + continue; + if (I->getOpcode() == Mips::PseudoReturn || I->getOpcode() == Mips::PseudoReturn64 || I->getOpcode() == Mips::PseudoIndirectBranch || diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index 3c47e74de46d6..60d14933a2e03 100644 --- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -612,12 +612,18 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) { if (MipsCompactBranchPolicy.getValue() != CB_Always || !TII->getEquivalentCompactForm(I)) { if (searchBackward(MBB, *I)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" + " in backwards search.\n"); Filled = true; } else if (I->isTerminator()) { if (searchSuccBBs(MBB, I)) { Filled = true; + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" + " in successor BB search.\n"); } } else if (searchForward(MBB, I)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" + " in forwards search.\n"); Filled = true; } } @@ -662,6 +668,8 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) { } // Bundle the NOP to the instruction with the delay slot. + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for "; + I->dump()); BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); MIBundleBuilder(MBB, I, std::next(I, 2)); ++FilledSlots; @@ -679,13 +687,25 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, for (IterTy I = Begin; I != End;) { IterTy CurrI = I; ++I; - + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: "; CurrI->dump()); // skip debug value - if (CurrI->isDebugInstr()) + if (CurrI->isDebugInstr()) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: "; + CurrI->dump()); continue; + } + + if (CurrI->isBundle()) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: "; + CurrI->dump()); + continue; + } - if (terminateSearch(*CurrI)) + if (terminateSearch(*CurrI)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: "; + CurrI->dump()); break; + } assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) && "Cannot put calls, returns or branches in delay slot."); @@ -731,6 +751,9 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, continue; Filler = CurrI; + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: "; + CurrI->dump()); + return true; } @@ -751,8 +774,11 @@ bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB, MachineBasicBlock::iterator SlotI = Slot; if (!searchRange(MBB, ++SlotI.getReverse(), MBB.rend(), RegDU, MemDU, Slot, - Filler)) + Filler)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " + "slot using backwards search.\n"); return false; + } MBB.splice(std::next(SlotI), &MBB, Filler.getReverse()); MIBundleBuilder(MBB, SlotI, std::next(SlotI, 2)); @@ -772,8 +798,11 @@ bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB, RegDU.setCallerSaved(*Slot); - if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler)) + if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " + "slot using forwards search.\n"); return false; + } MBB.splice(std::next(Slot), &MBB, Filler); MIBundleBuilder(MBB, Slot, std::next(Slot, 2)); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index da8be7c640b8b..3b626383d1d5a 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -1140,6 +1140,13 @@ def simm12 : Operand { let DecoderMethod = "DecodeSimm12"; } +def mem_simm9_exp : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm9); + let ParserMatchClass = MipsMemSimmPtrAsmOperand; + let OperandNamespace = "MipsII"; + let OperandType = "OPERAND_MEM_SIMM9"; +} + foreach I = {9, 10, 11, 12, 16} in def mem_simm # I : mem_generic { let MIOperandInfo = (ops ptr_rc, !cast("simm" # I)); diff --git a/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/llvm/lib/Target/Mips/MipsMCInstLower.cpp index fd984058a2bf5..66e04bda2af32 100644 --- a/llvm/lib/Target/Mips/MipsMCInstLower.cpp +++ b/llvm/lib/Target/Mips/MipsMCInstLower.cpp @@ -34,7 +34,7 @@ void MipsMCInstLower::Initialize(MCContext *C) { MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, - unsigned Offset) const { + int64_t Offset) const { MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; MipsMCExpr::MipsExprKind TargetKind = MipsMCExpr::MEK_None; bool IsGpOff = false; @@ -161,9 +161,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); if (Offset) { - // Assume offset is never negative. - assert(Offset > 0); - + // Note: Offset can also be negative Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx), *Ctx); } @@ -177,7 +175,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, } MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, - unsigned offset) const { + int64_t offset) const { MachineOperandType MOTy = MO.getType(); switch (MOTy) { diff --git a/llvm/lib/Target/Mips/MipsMCInstLower.h b/llvm/lib/Target/Mips/MipsMCInstLower.h index 29af6f21de826..605a124bf1026 100644 --- a/llvm/lib/Target/Mips/MipsMCInstLower.h +++ b/llvm/lib/Target/Mips/MipsMCInstLower.h @@ -35,11 +35,11 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { void Initialize(MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; + MCOperand LowerOperand(const MachineOperand &MO, int64_t offset = 0) const; private: MCOperand LowerSymbolOperand(const MachineOperand &MO, - MachineOperandType MOTy, unsigned Offset) const; + MachineOperandType MOTy, int64_t Offset) const; MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, MipsMCExpr::MipsExprKind Kind) const; void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; diff --git a/llvm/lib/Target/Mips/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MipsTargetStreamer.h index 298d056ce2c35..b389ba8938c4b 100644 --- a/llvm/lib/Target/Mips/MipsTargetStreamer.h +++ b/llvm/lib/Target/Mips/MipsTargetStreamer.h @@ -156,10 +156,6 @@ class MipsTargetStreamer : public MCTargetStreamer { unsigned BaseReg, int64_t Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, unsigned BaseReg, - MCOperand &HiOperand, MCOperand &LoOperand, - unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI); void emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, int64_t Offset, unsigned TmpReg, SMLoc IDLoc, const MCSubtargetInfo *STI); diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 28d7840d54124..1893d6e32c9ac 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -29,7 +29,7 @@ add_llvm_target(PowerPCCodeGen PPCEarlyReturn.cpp PPCFastISel.cpp PPCFrameLowering.cpp - PPCLoopPreIncPrep.cpp + PPCLoopInstrFormPrep.cpp PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCMachineScheduler.cpp diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 1216cd7272893..a61c34ca6f14b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -87,4 +87,5 @@ PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { assert(!IsLittleEndian && "Little-endian XCOFF not supported."); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; ZeroDirective = "\t.space\t"; + SymbolsHaveSMC = true; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a9717bfc3082a..00df9e41fdae0 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" @@ -108,8 +109,11 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer { : PPCTargetStreamer(S), OS(OS) {} void emitTCEntry(const MCSymbol &S) override { + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); OS << "\t.tc "; - OS << S.getName(); + OS << (MAI->getSymbolsHaveSMC() + ? cast(S).getUnqualifiedName() + : S.getName()); OS << "[TC],"; OS << S.getName(); OS << '\n'; @@ -243,7 +247,10 @@ class PPCTargetXCOFFStreamer : public PPCTargetStreamer { PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} void emitTCEntry(const MCSymbol &S) override { - report_fatal_error("TOC entries not supported yet."); + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); + const unsigned PointerSize = MAI->getCodePointerSize(); + Streamer.EmitValueToAlignment(PointerSize); + Streamer.EmitSymbolValue(&S, PointerSize); } void emitMachine(StringRef CPU) override { diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index 5a830d2294116..a83509f0e6870 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -34,7 +34,7 @@ namespace llvm { #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif - FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM); + FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM); FunctionPass *createPPCTOCRegDepsPass(); FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCVSXCopyPass(); @@ -60,7 +60,7 @@ namespace llvm { #ifndef NDEBUG void initializePPCCTRLoopsVerifyPass(PassRegistry&); #endif - void initializePPCLoopPreIncPrepPass(PassRegistry&); + void initializePPCLoopInstrFormPrepPass(PassRegistry&); void initializePPCTOCRegDepsPass(PassRegistry&); void initializePPCEarlyReturnPass(PassRegistry&); void initializePPCVSXCopyPass(PassRegistry&); diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index de007d3b8d0b2..1d5396912ef08 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -51,6 +51,8 @@ def DirectivePwr6x def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">; def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">; +def DirectivePwrFuture + : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -209,36 +211,94 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units", // came before them, the idea is to make implementations of new processors // less error prone and easier to read. // Namely: -// list Power8FeatureList = ... -// list FutureProcessorSpecificFeatureList = -// [ features that Power8 does not support ] -// list FutureProcessorFeatureList = -// !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) +// list P8InheritableFeatures = ... +// list FutureProcessorAddtionalFeatures = +// [ features that Power8 does not support but inheritable ] +// list FutureProcessorSpecificFeatures = +// [ features that Power8 does not support and not inheritable ] +// list FutureProcessorInheritableFeatures = +// !listconcat(P8InheritableFeatures, FutureProcessorAddtionalFeatures) +// list FutureProcessorFeatures = +// !listconcat(FutureProcessorInheritableFeatures, +// FutureProcessorSpecificFeatures) // Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as // well as providing a single point of definition if the feature set will be // used elsewhere. def ProcessorFeatures { - list Power7FeatureList = - [DirectivePwr7, FeatureAltivec, FeatureVSX, - FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, - FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, - FeatureBPERMD, FeatureExtDiv, - FeatureMFTB, DeprecatedDST, FeatureTwoConstNR]; - list Power8SpecificFeatures = - [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, - FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; - list Power8FeatureList = - !listconcat(Power7FeatureList, Power8SpecificFeatures); - list Power9SpecificFeatures = - [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0, - FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched]; - list Power9FeatureList = - !listconcat(Power8FeatureList, Power9SpecificFeatures); + // Power7 + list P7InheritableFeatures = [DirectivePwr7, + FeatureAltivec, + FeatureVSX, + FeatureMFOCRF, + FeatureFCPSGN, + FeatureFSqrt, + FeatureFRE, + FeatureFRES, + FeatureFRSQRTE, + FeatureFRSQRTES, + FeatureRecipPrec, + FeatureSTFIWX, + FeatureLFIWAX, + FeatureFPRND, + FeatureFPCVT, + FeatureISEL, + FeaturePOPCNTD, + FeatureCMPB, + FeatureLDBRX, + Feature64Bit, + /* Feature64BitRegs, */ + FeatureBPERMD, + FeatureExtDiv, + FeatureMFTB, + DeprecatedDST, + FeatureTwoConstNR]; + list P7SpecificFeatures = []; + list P7Features = + !listconcat(P7InheritableFeatures, P7SpecificFeatures); + + // Power8 + list P8AdditionalFeatures = [DirectivePwr8, + FeatureP8Altivec, + FeatureP8Vector, + FeatureP8Crypto, + FeatureHTM, + FeatureDirectMove, + FeatureICBT, + FeaturePartwordAtomic]; + list P8SpecificFeatures = []; + list P8InheritableFeatures = + !listconcat(P7InheritableFeatures, P8AdditionalFeatures); + list P8Features = + !listconcat(P8InheritableFeatures, P8SpecificFeatures); + + // Power9 + list P9AdditionalFeatures = [DirectivePwr9, + FeatureP9Altivec, + FeatureP9Vector, + FeatureISA3_0]; + // Some features are unique to Power9 and there is no reason to assume + // they will be part of any future CPUs. One example is the narrower + // dispatch for vector operations than scalar ones. For the time being, + // this list also includes scheduling-related features since we do not have + // enough info to create custom scheduling strategies for future CPUs. + list P9SpecificFeatures = [FeatureVectorsUseTwoUnits, + FeaturePPCPreRASched, + FeaturePPCPostRASched]; + list P9InheritableFeatures = + !listconcat(P8InheritableFeatures, P9AdditionalFeatures); + list P9Features = + !listconcat(P9InheritableFeatures, P9SpecificFeatures); + + // Future + // For future CPU we assume that all of the existing features from Power 9 + // still exist with the exception of those we know are Power 9 specific. + list FutureAdditionalFeatures = []; + list FutureSpecificFeatures = []; + list FutureInheritableFeatures = + !listconcat(P9InheritableFeatures, FutureAdditionalFeatures); + list FutureFeatures = + !listconcat(FutureInheritableFeatures, FutureSpecificFeatures); } // Note: Future features to add when support is extended to more @@ -438,9 +498,12 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit, FeatureMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; -def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; -def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; +def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; +def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; +def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; +// No scheduler model for future CPU. +def : ProcessorModel<"future", NoSchedModel, + ProcessorFeatures.FutureFeatures>; def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat, FeatureMFTB]>; def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat, @@ -451,7 +514,7 @@ def : ProcessorModel<"ppc64", G5Model, FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; -def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>; +def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.P8Features>; //===----------------------------------------------------------------------===// // Calling Conventions diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 53dbb02bb8e4b..9b8fb4ddd311d 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -82,6 +83,8 @@ class PPCAsmPrinter : public AsmPrinter { const PPCSubtarget *Subtarget = nullptr; StackMaps SM; + virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO); + public: explicit PPCAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) @@ -161,6 +164,11 @@ class PPCDarwinAsmPrinter : public PPCAsmPrinter { }; class PPCAIXAsmPrinter : public PPCAsmPrinter { +private: + static void ValidateGV(const GlobalVariable *GV); +protected: + MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override; + public: PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : PPCAsmPrinter(TM, std::move(Streamer)) {} @@ -514,17 +522,16 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, /// Map a machine operand for a TOC pseudo-machine instruction to its /// corresponding MCSymbol. -static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, - AsmPrinter &AP) { +MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_GlobalAddress: - return AP.getSymbol(MO.getGlobal()); + return getSymbol(MO.getGlobal()); case MachineOperand::MO_ConstantPoolIndex: - return AP.GetCPISymbol(MO.getIndex()); + return GetCPISymbol(MO.getIndex()); case MachineOperand::MO_JumpTableIndex: - return AP.GetJTISymbol(MO.getIndex()); + return GetJTISymbol(MO.getIndex()); case MachineOperand::MO_BlockAddress: - return AP.GetBlockAddressSymbol(MO.getBlockAddress()); + return GetBlockAddressSymbol(MO.getBlockAddress()); default: llvm_unreachable("Unexpected operand type to get symbol."); } @@ -688,7 +695,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for LWZtoc."); // Map the operand to its corresponding MCSymbol. - const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO); // Create a reference to the GOT entry for the symbol. The GOT entry will be // synthesized later. @@ -749,7 +756,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // global address operand to be a reference to the TOC entry we will // synthesize later. MCSymbol *TOCEntry = - lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); + lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO)); const MCSymbolRefExpr::VariantKind VK = IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; @@ -775,7 +782,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for ADDIStocHA."); // Map the machine operand to its corresponding MCSymbol. - MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); // Always use TOC on AIX. Map the global address operand to be a reference // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to @@ -805,7 +812,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for LWZtocL."); // Map the machine operand to its corresponding MCSymbol. - MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); // Always use TOC on AIX. Map the global address operand to be a reference // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to @@ -835,7 +842,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for ADDIStocHA8!"); - const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); const bool GlobalToc = MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); @@ -881,7 +888,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "LDtocL used on symbol that could be accessed directly is " "invalid. Must match ADDIStocHA8.")); - const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); @@ -911,7 +918,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Interposable definitions must use indirect access.")); const MCExpr *Exp = - MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this), + MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO), MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -1603,7 +1610,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: why is power8 missing here? "ppc64", "ppc64le", - "power9" + "power9", + "future" }; // Get the numerically largest directive. @@ -1735,7 +1743,7 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { return AsmPrinter::SetupMachineFunction(MF); } -void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { +void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) { // Early error checking limiting what is supported. if (GV->isThreadLocal()) report_fatal_error("Thread local not yet supported on AIX."); @@ -1745,6 +1753,19 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GV->hasComdat()) report_fatal_error("COMDAT not yet supported by AIX."); +} + +void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + ValidateGV(GV); + + // External global variables are already handled. + if (!GV->hasInitializer()) + return; + + // Create the symbol, set its storage class. + MCSymbolXCOFF *GVSym = cast(getSymbol(GV)); + GVSym->setStorageClass( + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); if ((!GVKind.isCommon() && !GVKind.isBSS() && !GVKind.isData() && @@ -1758,11 +1779,6 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSectionXCOFF *Csect = cast( getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); OutStreamer->SwitchSection(Csect); - - // Create the symbol, set its storage class, and emit it. - MCSymbolXCOFF *GVSym = cast(getSymbol(GV)); - GVSym->setStorageClass( - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); GVSym->setContainingCsect(Csect); const DataLayout &DL = GV->getParent()->getDataLayout(); @@ -1801,7 +1817,10 @@ void PPCAIXAsmPrinter::EmitFunctionDescriptor() { OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), PointerSize); // Emit TOC base address. - MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection( + StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT, + SectionKind::getData()); + const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol(); OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext), PointerSize); // Emit a null environment pointer. @@ -1820,10 +1839,84 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) { MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection( StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData()); + // The TOC-base always has 0 size, but 4 byte alignment. + TOCBaseSection->setAlignment(Align(4)); // Switch to section to emit TOC base. OutStreamer->SwitchSection(TOCBaseSection); + + PPCTargetStreamer &TS = + static_cast(*OutStreamer->getTargetStreamer()); + + for (auto &I : TOC) { + // Setup the csect for the current TC entry. + MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection( + cast(I.first)->getUnqualifiedName(), XCOFF::XMC_TC, + XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData()); + cast(I.second)->setContainingCsect(TCEntry); + OutStreamer->SwitchSection(TCEntry); + + OutStreamer->EmitLabel(I.second); + TS.emitTCEntry(*I.first); + } } +MCSymbol * +PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) { + const GlobalObject *GO = nullptr; + + // If the MO is a function or certain kind of globals, we want to make sure to + // refer to the csect symbol, otherwise we can just do the default handling. + if (MO.getType() != MachineOperand::MO_GlobalAddress || + !(GO = dyn_cast(MO.getGlobal()))) + return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO); + + // Do an early error check for globals we don't support. This will go away + // eventually. + const auto *GV = dyn_cast(GO); + if (GV) { + ValidateGV(GV); + } + + MCSymbolXCOFF *XSym = cast(getSymbol(GO)); + + // If the global object is a global variable without initializer or is a + // declaration of a function, then XSym is an external referenced symbol. + // Hence we may need to explictly create a MCSectionXCOFF for it so that we + // can return its symbol later. + if (GO->isDeclaration() && !XSym->hasContainingCsect()) { + // Make sure the storage class is set. + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + XSym->setStorageClass(SC); + + MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection( + XSym->getName(), isa(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, + XCOFF::XTY_ER, SC, SectionKind::getMetadata()); + XSym->setContainingCsect(Csect); + + return Csect->getQualNameSymbol(); + } + + // Handle initialized global variables. + if (GV) { + SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); + + // If the operand is a common then we should refer to the csect symbol. + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + MCSectionXCOFF *Csect = cast( + getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); + return Csect->getQualNameSymbol(); + } + + // Other global variables are refered to by labels inside of a single csect, + // so refer to the label directly. + return getSymbol(GV); + } + + // If the MO is a function, we want to make sure to refer to the function + // descriptor csect. + return XSym->getContainingCsect()->getQualNameSymbol(); +} /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code /// for a MachineFunction to the given output stream, in a format that the diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f95f8be8a0481..a4f662dfdddb6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -52,6 +52,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" @@ -1216,6 +1217,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); break; @@ -3416,15 +3418,16 @@ SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + if (Subtarget.isAIXABI()) + return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); if (Subtarget.is64BitELFABI()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - else if (Subtarget.is32BitELFABI()) + if (Subtarget.is32BitELFABI()) return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - // FIXME: We are using this for both AIX and Darwin. We should add appropriate - // AIX testing, and rename it appropriately. return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } @@ -5326,16 +5329,19 @@ SDValue PPCTargetLowering::FinishCall( GlobalAddressSDNode *G = cast(Callee); auto &Context = DAG.getMachineFunction().getMMI().getContext(); + const GlobalObject *GO = cast(G->getGlobal()); MCSymbolXCOFF *S = cast(Context.getOrCreateSymbol( - Twine(".") + Twine(G->getGlobal()->getName()))); - - const GlobalValue *GV = G->getGlobal(); - if (GV && GV->isDeclaration() && !S->hasContainingCsect()) { - // On AIX, undefined symbol need to associate with a MCSectionXCOFF to - // get the correct storage mapping class. In this case, XCOFF::XMC_PR. + Twine(".") + Twine(GO->getName()))); + + if (GO && GO->isDeclaration() && !S->hasContainingCsect()) { + // On AIX, an undefined symbol needs to be associated with a + // MCSectionXCOFF to get the correct storage mapping class. + // In this case, XCOFF::XMC_PR. + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); MCSectionXCOFF *Sec = Context.getXCOFFSection(S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, - XCOFF::C_EXT, SectionKind::getMetadata()); + SC, SectionKind::getMetadata()); S->setContainingCsect(Sec); } @@ -6803,6 +6809,117 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, } } +static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, + bool IsPPC64) { + assert((IsPPC64 || SVT != MVT::i64) && + "i64 should have been split for 32-bit codegen."); + + switch (SVT) { + default: + report_fatal_error("Unexpected value type for formal argument"); + case MVT::i1: + case MVT::i32: + case MVT::i64: + return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + case MVT::f32: + return &PPC::F4RCRegClass; + case MVT::f64: + return &PPC::F8RCRegClass; + } +} + +static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, + SelectionDAG &DAG, SDValue ArgValue, + MVT LocVT, const SDLoc &dl) { + assert(ValVT.isScalarInteger() && LocVT.isScalarInteger()); + assert(ValVT.getSizeInBits() < LocVT.getSizeInBits()); + + if (Flags.isSExt()) + ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, + DAG.getValueType(ValVT)); + else if (Flags.isZExt()) + ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, + DAG.getValueType(ValVT)); + + return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue); +} + +SDValue PPCTargetLowering::LowerFormalArguments_AIX( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + + assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold || + CallConv == CallingConv::Fast) && + "Unexpected calling convention!"); + + if (isVarArg) + report_fatal_error("This call type is unimplemented on AIX."); + + if (getTargetMachine().Options.GuaranteedTailCallOpt) + report_fatal_error("Tail call support is unimplemented on AIX."); + + if (useSoftFloat()) + report_fatal_error("Soft float support is unimplemented on AIX."); + + const PPCSubtarget &Subtarget = + static_cast(DAG.getSubtarget()); + if (Subtarget.hasQPX()) + report_fatal_error("QPX support is not supported on AIX."); + + const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrByteSize = IsPPC64 ? 8 : 4; + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + MachineFunction &MF = DAG.getMachineFunction(); + CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); + + // Reserve space for the linkage area on the stack. + const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + // On AIX a minimum of 8 words is saved to the parameter save area. + const unsigned MinParameterSaveArea = 8 * PtrByteSize; + CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize); + CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (VA.isRegLoc()) { + EVT ValVT = VA.getValVT(); + MVT LocVT = VA.getLocVT(); + MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); + ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); + if (ValVT.isScalarInteger() && + (ValVT.getSizeInBits() < LocVT.getSizeInBits())) { + ArgValue = + truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); + } + InVals.push_back(ArgValue); + } else { + report_fatal_error("Handling of formal arguments on the stack is " + "unimplemented!"); + } + } + + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea = CCInfo.getNextStackOffset(); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so + // that taking the difference between two stack areas will result in an + // aligned stack. + MinReservedArea = + EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setMinReservedArea(MinReservedArea); + + return Chain; +} + SDValue PPCTargetLowering::LowerCall_AIX( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, @@ -14200,7 +14317,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: - case PPC::DIR_PWR9: { + case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: { if (!ML) break; @@ -15379,6 +15497,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { // vector 7 2 2 return true; case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 // vector 7 2 2 diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 77b19b2634669..612d1c6b3f26e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1121,6 +1121,10 @@ namespace llvm { SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const; + SDValue LowerFormalArguments_AIX( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue LowerFormalArguments_Darwin( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index e94ef4b1e505c..f5e2b473f1ee5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3757,8 +3757,10 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg(); unsigned Opc = MI.getOpcode(); - bool SpecialShift32 = - Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo; + bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLWo || + Opc == PPC::SRW || Opc == PPC::SRWo || + Opc == PPC::SLW8 || Opc == PPC::SLW8o || + Opc == PPC::SRW8 || Opc == PPC::SRW8o; bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo; bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo || diff --git a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp similarity index 95% rename from llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp rename to llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 72c347e005192..086db4ef9ec90 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -1,4 +1,4 @@ -//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===// +//===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -41,7 +41,7 @@ // *++p = c; //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppc-loop-preinc-prep" +#define DEBUG_TYPE "ppc-loop-instr-form-prep" #include "PPC.h" #include "PPCSubtarget.h" @@ -148,16 +148,16 @@ namespace { // For DQ form instructions, their displacements must be multiple of 16. enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 }; - class PPCLoopPreIncPrep : public FunctionPass { + class PPCLoopInstrFormPrep : public FunctionPass { public: static char ID; // Pass ID, replacement for typeid - PPCLoopPreIncPrep() : FunctionPass(ID) { - initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); + PPCLoopInstrFormPrep() : FunctionPass(ID) { + initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); } - PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { - initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); + PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { + initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -237,20 +237,20 @@ namespace { } // end anonymous namespace -char PPCLoopPreIncPrep::ID = 0; -static const char *name = "Prepare loop for pre-inc. addressing modes"; -INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +char PPCLoopInstrFormPrep::ID = 0; +static const char *name = "Prepare loop for ppc preferred instruction forms"; +INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) static const std::string PHINodeNameSuffix = ".phi"; static const std::string CastNodeNameSuffix = ".cast"; static const std::string GEPNodeIncNameSuffix = ".inc"; static const std::string GEPNodeOffNameSuffix = ".off"; -FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { - return new PPCLoopPreIncPrep(TM); +FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) { + return new PPCLoopInstrFormPrep(TM); } static bool IsPtrInBounds(Value *BasePtr) { @@ -284,7 +284,7 @@ static Value *GetPointerOperand(Value *MemI) { return nullptr; } -bool PPCLoopPreIncPrep::runOnFunction(Function &F) { +bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -305,7 +305,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { return MadeChange; } -void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, +void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, SmallVector &Buckets, unsigned MaxCandidateNum) { assert((MemI && GetPointerOperand(MemI)) && @@ -328,7 +328,7 @@ void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, } } -SmallVector PPCLoopPreIncPrep::collectCandidates( +SmallVector PPCLoopInstrFormPrep::collectCandidates( Loop *L, std::function isValidCandidate, unsigned MaxCandidateNum) { @@ -369,7 +369,7 @@ SmallVector PPCLoopPreIncPrep::collectCandidates( return Buckets; } -bool PPCLoopPreIncPrep::prepareBaseForDispFormChain(Bucket &BucketChain, +bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain, InstrForm Form) { // RemainderOffsetInfo details: // key: value of (Offset urem DispConstraint). For DSForm, it can @@ -444,7 +444,7 @@ bool PPCLoopPreIncPrep::prepareBaseForDispFormChain(Bucket &BucketChain, // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a // multipler of 4, it cannot be represented by sint16. -bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { +bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { // We have a choice now of which instruction's memory operand we use as the // base for the generated PHI. Always picking the first instruction in each // bucket does not work well, specifically because that instruction might @@ -484,7 +484,7 @@ bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { return true; } -bool PPCLoopPreIncPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, +bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, SmallSet &BBChanged, InstrForm Form) { bool MadeChange = false; @@ -676,7 +676,7 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, return MadeChange; } -bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, +bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L, SmallVector &Buckets) { bool MadeChange = false; if (Buckets.empty()) @@ -695,7 +695,7 @@ bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, return MadeChange; } -bool PPCLoopPreIncPrep::dispFormPrep(Loop *L, SmallVector &Buckets, +bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector &Buckets, InstrForm Form) { bool MadeChange = false; @@ -721,7 +721,7 @@ bool PPCLoopPreIncPrep::dispFormPrep(Loop *L, SmallVector &Buckets, // This function will check to see if that PHI already exists and will return // true if it found an existing PHI with the matched start and increment as the // one we wanted to create. -bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, +bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI, const SCEV *BasePtrStartSCEV, const SCEVConstant *BasePtrIncSCEV, InstrForm Form) { @@ -787,7 +787,7 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, return false; } -bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { +bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { bool MadeChange = false; // Only prep. the inner-most loop diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 6aa2fdcbec822..7eeff007b78fb 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -18,6 +18,7 @@ // //===---------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" @@ -806,6 +807,143 @@ bool PPCMIPeephole::simplifyCode(void) { combineSEXTAndSHL(MI, ToErase); break; } + case PPC::RLWINM: + case PPC::RLWINMo: + case PPC::RLWINM8: + case PPC::RLWINM8o: { + unsigned FoldingReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(FoldingReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); + if (SrcMI->getOpcode() != PPC::RLWINM && + SrcMI->getOpcode() != PPC::RLWINMo && + SrcMI->getOpcode() != PPC::RLWINM8 && + SrcMI->getOpcode() != PPC::RLWINM8o) + break; + assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && + MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && + SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && + "Invalid PPC::RLWINM Instruction!"); + uint64_t SHSrc = SrcMI->getOperand(2).getImm(); + uint64_t SHMI = MI.getOperand(2).getImm(); + uint64_t MBSrc = SrcMI->getOperand(3).getImm(); + uint64_t MBMI = MI.getOperand(3).getImm(); + uint64_t MESrc = SrcMI->getOperand(4).getImm(); + uint64_t MEMI = MI.getOperand(4).getImm(); + + assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && + "Invalid PPC::RLWINM Instruction!"); + + // If MBMI is bigger than MEMI, we always can not get run of ones. + // RotatedSrcMask non-wrap: + // 0........31|32........63 + // RotatedSrcMask: B---E B---E + // MaskMI: -----------|--E B------ + // Result: ----- --- (Bad candidate) + // + // RotatedSrcMask wrap: + // 0........31|32........63 + // RotatedSrcMask: --E B----|--E B---- + // MaskMI: -----------|--E B------ + // Result: --- -----|--- ----- (Bad candidate) + // + // One special case is RotatedSrcMask is a full set mask. + // RotatedSrcMask full: + // 0........31|32........63 + // RotatedSrcMask: ------EB---|-------EB--- + // MaskMI: -----------|--E B------ + // Result: -----------|--- ------- (Good candidate) + + // Mark special case. + bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); + + // For other MBMI > MEMI cases, just return. + if ((MBMI > MEMI) && !SrcMaskFull) + break; + + // Handle MBMI <= MEMI cases. + APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); + // In MI, we only need low 32 bits of SrcMI, just consider about low 32 + // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, + // while in PowerPC ISA, lowerest bit is at index 63. + APInt MaskSrc = + APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); + // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is + // equal to highBit. + // If MBSrc - MESrc == 1, we expect a full set mask instead of Null. + if (SrcMaskFull && (MBSrc - MESrc == 1)) + MaskSrc.setAllBits(); + + APInt RotatedSrcMask = MaskSrc.rotl(SHMI); + APInt FinalMask = RotatedSrcMask & MaskMI; + uint32_t NewMB, NewME; + + // If final mask is 0, MI result should be 0 too. + if (FinalMask.isNullValue()) { + bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 || + MI.getOpcode() == PPC::RLWINM8o); + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(MI.dump()); + + if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { + // Replace MI with "LI 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.getOperand(1).ChangeToImmediate(0); + MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI)); + } else { + // Replace MI with "ANDIo reg, 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.getOperand(2).setImm(0); + MI.setDesc(TII->get(Is64Bit ? PPC::ANDIo8 : PPC::ANDIo)); + } + Simplified = true; + NumRotatesCollapsed++; + + LLVM_DEBUG(dbgs() << "With: "); + LLVM_DEBUG(MI.dump()); + } else if (isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, + NewME) || SrcMaskFull) { + // If FoldingReg has only one use and it it not RLWINMo and + // RLWINM8o, safe to delete its def SrcMI. Otherwise keep it. + if (MRI->hasOneNonDBGUse(FoldingReg) && + (SrcMI->getOpcode() == PPC::RLWINM || + SrcMI->getOpcode() == PPC::RLWINM8)) { + ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); + } + + LLVM_DEBUG(dbgs() << "Converting Instr: "); + LLVM_DEBUG(MI.dump()); + + uint16_t NewSH = (SHSrc + SHMI) % 32; + MI.getOperand(2).setImm(NewSH); + // If SrcMI mask is full, no need to update MBMI and MEMI. + if (!SrcMaskFull) { + MI.getOperand(3).setImm(NewMB); + MI.getOperand(4).setImm(NewME); + } + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); + + Simplified = true; + NumRotatesCollapsed++; + + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + } + break; + } } } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index dcf64a5d6f9b8..7266d82a08b54 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -57,6 +57,7 @@ namespace PPC { DIR_PWR7, DIR_PWR8, DIR_PWR9, + DIR_PWR_FUTURE, DIR_64 }; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index d548e7ace68da..35f6d32a07db2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -51,8 +51,8 @@ opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); static cl:: -opt DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, - cl::desc("Disable PPC loop preinc prep")); +opt DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden, + cl::desc("Disable PPC loop instr form prep")); static cl::opt VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", @@ -104,7 +104,7 @@ extern "C" void LLVMInitializePowerPCTarget() { #ifndef NDEBUG initializePPCCTRLoopsVerifyPass(PR); #endif - initializePPCLoopPreIncPrepPass(PR); + initializePPCLoopInstrFormPrepPass(PR); initializePPCTOCRegDepsPass(PR); initializePPCEarlyReturnPass(PR); initializePPCVSXCopyPass(PR); @@ -431,8 +431,8 @@ void PPCPassConfig::addIRPasses() { } bool PPCPassConfig::addPreISel() { - if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None) - addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); + if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None) + addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) addPass(createHardwareLoopsPass()); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 380d718885251..7079498cd815e 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -651,8 +651,9 @@ unsigned PPCTTIImpl::getCacheLineSize() const { // On P7, P8 or P9 we have a cache line size of 128. unsigned Directive = ST->getCPUDirective(); + // Assume that Future CPU has the same cache line size as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) return 128; // On other processors return a default of 64 bytes. @@ -684,8 +685,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { // For P7 and P8, floating-point instructions have a 6-cycle latency and // there are two execution units, so unroll by 12x for latency hiding. // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready + // Assume that future is the same as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) return 12; // For most things, modern systems have two execution units (and diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b9aa5cf32b590..1e562f3f54b59 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -581,10 +581,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, int64_t Offset = N->getOffset(); MVT XLenVT = Subtarget.getXLenVT(); - // Non-PIC TLS lowering should always use the LocalExec model. - TLSModel::Model Model = isPositionIndependent() - ? getTargetMachine().getTLSModel(N->getGlobal()) - : TLSModel::LocalExec; + TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); SDValue Addr; switch (Model) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index fe38c4ff02d33..b5343e8a83098 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -231,6 +231,9 @@ def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>; def : PatFpr64Fpr64; def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; +def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>; +def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, + 0b111))>; // fmadd: rs1 * rs2 + rs3 def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3), diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp index 91cb35dd72f26..c5cce39747a9e 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -41,8 +41,12 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O) { - if (MO.isReg()) - O << '%' << getRegisterName(MO.getReg()); + if (MO.isReg()) { + if (!MO.getReg()) + O << '0'; + else + O << '%' << getRegisterName(MO.getReg()); + } else if (MO.isImm()) O << MO.getImm(); else if (MO.isExpr()) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index f8adca740a681..7994176c4c265 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -3609,7 +3609,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, // Get the known-zero mask for the operand. KnownBits Known = DAG.computeKnownBits(Op); - unsigned NumSignificantBits = (~Known.Zero).getActiveBits(); + unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 8b334756611a4..041971ca7cb8f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -2069,7 +2069,7 @@ let Predicates = [FeatureProcessorAssist] in { def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>; def : Pat<(int_s390_ppa_txassist GR32:$src), (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), - 0, 1)>; + zero_reg, 1)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index b5fbbc427a29b..17ce31f01ed75 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3132,6 +3132,7 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int: if (ForcedVEXEncoding != VEXEncoding_EVEX) return Match_Unsupported; + break; } return Match_Success; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index f08fcb575bf00..1ccb9b7cbf748 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -12,6 +12,8 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -22,6 +24,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -114,12 +117,24 @@ class X86AsmBackend : public MCAsmBackend { assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); - // Check that uppper bits are either all zeros or all ones. - // Specifically ignore overflow/underflow as long as the leakage is - // limited to the lower bits. This is to remain compatible with - // other assemblers. - assert((Size == 0 || isIntN(Size * 8 + 1, Value)) && - "Value does not fit in the Fixup field"); + int64_t SignedValue = static_cast(Value); + if ((Target.isAbsolute() || IsResolved) && + getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel) { + // check that PC relative fixup fits into the fixup size. + if (Size > 0 && !isIntN(Size * 8, SignedValue)) + Asm.getContext().reportError( + Fixup.getLoc(), "value of " + Twine(SignedValue) + + " is too large for field of " + Twine(Size) + + ((Size == 1) ? " byte." : " bytes.")); + } else { + // Check that uppper bits are either all zeros or all ones. + // Specifically ignore overflow/underflow as long as the leakage is + // limited to the lower bits. This is to remain compatible with + // other assemblers. + assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && + "Value does not fit in the Fixup field"); + } for (unsigned i = 0; i != Size; ++i) Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 335127c6d0642..0bb23b03685c9 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -101,6 +101,248 @@ namespace X86 { COND_INVALID }; + + // The classification for the first instruction in macro fusion. + enum class FirstMacroFusionInstKind { + // TEST + Test, + // CMP + Cmp, + // AND + And, + // ADD, SUB + AddSub, + // INC, DEC + IncDec, + // Not valid as a first macro fusion instruction + Invalid + }; + + enum class SecondMacroFusionInstKind { + // JA, JB and variants. + AB, + // JE, JL, JG and variants. + ELG, + // JS, JP, JO and variants + SPO, + // Not a fusible jump. + Invalid, + }; + + /// classifyFirstOpcodeInMacroFusion - return the type of the first + /// instruction in macro-fusion. + inline FirstMacroFusionInstKind + classifyFirstOpcodeInMacroFusion(unsigned Opcode) { + switch (Opcode) { + default: + return FirstMacroFusionInstKind::Invalid; + // TEST + case X86::TEST16i16: + case X86::TEST16mr: + case X86::TEST16ri: + case X86::TEST16rr: + case X86::TEST32i32: + case X86::TEST32mr: + case X86::TEST32ri: + case X86::TEST32rr: + case X86::TEST64i32: + case X86::TEST64mr: + case X86::TEST64ri32: + case X86::TEST64rr: + case X86::TEST8i8: + case X86::TEST8mr: + case X86::TEST8ri: + case X86::TEST8rr: + return FirstMacroFusionInstKind::Test; + case X86::AND16i16: + case X86::AND16ri: + case X86::AND16ri8: + case X86::AND16rm: + case X86::AND16rr: + case X86::AND16rr_REV: + case X86::AND32i32: + case X86::AND32ri: + case X86::AND32ri8: + case X86::AND32rm: + case X86::AND32rr: + case X86::AND32rr_REV: + case X86::AND64i32: + case X86::AND64ri32: + case X86::AND64ri8: + case X86::AND64rm: + case X86::AND64rr: + case X86::AND64rr_REV: + case X86::AND8i8: + case X86::AND8ri: + case X86::AND8ri8: + case X86::AND8rm: + case X86::AND8rr: + case X86::AND8rr_REV: + return FirstMacroFusionInstKind::And; + // CMP + case X86::CMP16i16: + case X86::CMP16mr: + case X86::CMP16ri: + case X86::CMP16ri8: + case X86::CMP16rm: + case X86::CMP16rr: + case X86::CMP16rr_REV: + case X86::CMP32i32: + case X86::CMP32mr: + case X86::CMP32ri: + case X86::CMP32ri8: + case X86::CMP32rm: + case X86::CMP32rr: + case X86::CMP32rr_REV: + case X86::CMP64i32: + case X86::CMP64mr: + case X86::CMP64ri32: + case X86::CMP64ri8: + case X86::CMP64rm: + case X86::CMP64rr: + case X86::CMP64rr_REV: + case X86::CMP8i8: + case X86::CMP8mr: + case X86::CMP8ri: + case X86::CMP8ri8: + case X86::CMP8rm: + case X86::CMP8rr: + case X86::CMP8rr_REV: + return FirstMacroFusionInstKind::Cmp; + // ADD + case X86::ADD16i16: + case X86::ADD16ri: + case X86::ADD16ri8: + case X86::ADD16rm: + case X86::ADD16rr: + case X86::ADD16rr_REV: + case X86::ADD32i32: + case X86::ADD32ri: + case X86::ADD32ri8: + case X86::ADD32rm: + case X86::ADD32rr: + case X86::ADD32rr_REV: + case X86::ADD64i32: + case X86::ADD64ri32: + case X86::ADD64ri8: + case X86::ADD64rm: + case X86::ADD64rr: + case X86::ADD64rr_REV: + case X86::ADD8i8: + case X86::ADD8ri: + case X86::ADD8ri8: + case X86::ADD8rm: + case X86::ADD8rr: + case X86::ADD8rr_REV: + // SUB + case X86::SUB16i16: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB16rm: + case X86::SUB16rr: + case X86::SUB16rr_REV: + case X86::SUB32i32: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB32rm: + case X86::SUB32rr: + case X86::SUB32rr_REV: + case X86::SUB64i32: + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB64rm: + case X86::SUB64rr: + case X86::SUB64rr_REV: + case X86::SUB8i8: + case X86::SUB8ri: + case X86::SUB8ri8: + case X86::SUB8rm: + case X86::SUB8rr: + case X86::SUB8rr_REV: + return FirstMacroFusionInstKind::AddSub; + // INC + case X86::INC16r: + case X86::INC16r_alt: + case X86::INC32r: + case X86::INC32r_alt: + case X86::INC64r: + case X86::INC8r: + // DEC + case X86::DEC16r: + case X86::DEC16r_alt: + case X86::DEC32r: + case X86::DEC32r_alt: + case X86::DEC64r: + case X86::DEC8r: + return FirstMacroFusionInstKind::IncDec; + } + } + + /// classifySecondCondCodeInMacroFusion - return the type of the second + /// instruction in macro-fusion. + inline SecondMacroFusionInstKind + classifySecondCondCodeInMacroFusion(X86::CondCode CC) { + if (CC == X86::COND_INVALID) + return SecondMacroFusionInstKind::Invalid; + + switch (CC) { + default: + return SecondMacroFusionInstKind::Invalid; + // JE,JZ + case X86::COND_E: + // JNE,JNZ + case X86::COND_NE: + // JL,JNGE + case X86::COND_L: + // JLE,JNG + case X86::COND_LE: + // JG,JNLE + case X86::COND_G: + // JGE,JNL + case X86::COND_GE: + return SecondMacroFusionInstKind::ELG; + // JB,JC + case X86::COND_B: + // JNA,JBE + case X86::COND_BE: + // JA,JNBE + case X86::COND_A: + // JAE,JNC,JNB + case X86::COND_AE: + return SecondMacroFusionInstKind::AB; + // JS + case X86::COND_S: + // JNS + case X86::COND_NS: + // JP,JPE + case X86::COND_P: + // JNP,JPO + case X86::COND_NP: + // JO + case X86::COND_O: + // JNO + case X86::COND_NO: + return SecondMacroFusionInstKind::SPO; + } + } + + inline bool isMacroFused(FirstMacroFusionInstKind FirstKind, + SecondMacroFusionInstKind SecondKind) { + switch (FirstKind) { + case X86::FirstMacroFusionInstKind::Test: + case X86::FirstMacroFusionInstKind::And: + return true; + case X86::FirstMacroFusionInstKind::Cmp: + case X86::FirstMacroFusionInstKind::AddSub: + return SecondKind == X86::SecondMacroFusionInstKind::AB || + SecondKind == X86::SecondMacroFusionInstKind::ELG; + case X86::FirstMacroFusionInstKind::IncDec: + return SecondKind == X86::SecondMacroFusionInstKind::ELG; + case X86::FirstMacroFusionInstKind::Invalid: + return false; + } + llvm_unreachable("unknown fusion type"); + } } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 6840fc12751da..0481a40d462ae 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -150,6 +150,18 @@ void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); + +namespace X86AS { +enum : unsigned { + GS = 256, + FS = 257, + SS = 258, + PTR32_SPTR = 270, + PTR32_UPTR = 271, + PTR64 = 272 +}; +} // End X86AS namespace + } // End llvm namespace #endif diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 3374cd054a6e1..799c1f5d1285e 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1261,7 +1261,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. - int Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; + int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; if (isUInt<32>(Alloc)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(Alloc) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d8f9c5f7270d2..3c33c4bb1f212 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2224,12 +2224,11 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, AM.Scale = cast(Mgs->getScale())->getZExtValue(); unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); - // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS. - if (AddrSpace == 256) + if (AddrSpace == X86AS::GS) AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); - if (AddrSpace == 257) + if (AddrSpace == X86AS::FS) AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); - if (AddrSpace == 258) + if (AddrSpace == X86AS::SS) AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); SDLoc DL(N); @@ -5222,12 +5221,20 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } case ISD::STRICT_FADD: case ISD::STRICT_FSUB: + case ISD::STRICT_FP_ROUND: { + // X87 instructions has enabled these strict fp operation. + bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 || + Node->getOperand(1).getSimpleValueType() == MVT::f80; + if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87())) + break; + LLVM_FALLTHROUGH; + } case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_ROUND: // FIXME: Remove when we have isel patterns for strict versions of these // nodes. - CurDAG->mutateStrictFPToFP(Node); + if (!TLI->isStrictFPEnabled()) + CurDAG->mutateStrictFPToFP(Node); break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c658363f8d6a1..a840ca429343b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -277,6 +277,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); } + // Handle address space casts between mixed sized pointers. + setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); + setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); + // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); @@ -587,6 +591,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FMUL , VT, Legal); + setOperationAction(ISD::STRICT_FDIV , VT, Legal); + setOperationAction(ISD::STRICT_FSQRT , VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten + // as Custom. + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); } } @@ -657,6 +670,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LLROUND, MVT::f80, Expand); setOperationAction(ISD::LRINT, MVT::f80, Expand); setOperationAction(ISD::LLRINT, MVT::f80, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); + // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten + // as Custom. + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); } // f128 uses xmm registers, but most operations require libcalls. @@ -690,7 +714,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); // We need to custom handle any FP_ROUND with an f128 input, but // LegalizeDAG uses the result type to know when to run a custom handler. // So we have to list all legal floating point result types here. @@ -2422,6 +2447,10 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { assert(SrcAS != DestAS && "Expected different address spaces!"); + const TargetMachine &TM = getTargetMachine(); + if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS)) + return false; + return SrcAS < 256 && DestAS < 256; } @@ -4978,12 +5007,6 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); } -bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, - bool IsSigned) const { - // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available. - return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov(); -} - bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) @@ -19691,15 +19714,20 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { // fp128 needs to use a libcall. if (SrcVT == MVT::f128) { RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::FP_TO_SINT) + if (IsSigned) LC = RTLIB::getFPTOSINT(SrcVT, VT); else LC = RTLIB::getFPTOUINT(SrcVT, VT); - // FIXME: Strict fp! - assert(!IsStrict && "Unhandled strict operation!"); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first; + std::pair Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions, + SDLoc(Op), Chain); + + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } // Fall back to X87. @@ -19714,9 +19742,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); if (VT == MVT::f128) { @@ -19725,6 +19755,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + // FIXME: Strict fp. + assert(!IsStrict && "Strict FP not supported yet!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, @@ -19732,8 +19764,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); // It's legal except when f128 is involved @@ -19745,17 +19779,17 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { // FP_ROUND node has a second operand indicating whether it is known to be // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. + + SDLoc dl(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first; -} + std::pair Tmp = makeLibCall(DAG, LC, VT, In, CallOptions, + dl, Chain); -// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking -// the default expansion of STRICT_FP_ROUND. -static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) { - // FIXME: Need to form a libcall with an input chain for f128. - assert(Op.getOperand(0).getValueType() != MVT::f128 && - "Don't know how to handle f128 yet!"); - return Op; + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -24098,7 +24132,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MFI.setHasCopyImplyingStackAdjustment(true); // Don't do anything here, we will expand these intrinsics out later // during FinalizeISel in EmitInstrWithCustomInserter. - return SDValue(); + return Op; } case Intrinsic::x86_lwpins32: case Intrinsic::x86_lwpins64: @@ -24273,9 +24307,11 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + SDValue Offset = DAG.getUNDEF(VMask.getValueType()); - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, - MemIntr->getMemOperand(), true /* truncating */); + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask, + MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, + true /* truncating */); } case X86ISD::VTRUNCUS: case X86ISD::VTRUNCS: { @@ -27586,12 +27622,11 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode())) return Op; - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(), - N->getBasePtr(), Mask, - getZeroVector(VT, Subtarget, DAG, dl), - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), + N->isExpandingLoad()); // Emit a blend. SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad, PassThru); @@ -27625,11 +27660,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), - N->getBasePtr(), Mask, PassThru, - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + N->getExtensionType(), N->isExpandingLoad()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), @@ -27675,7 +27709,8 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), - Mask, N->getMemoryVT(), N->getMemOperand(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); } @@ -27729,6 +27764,29 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl); } +static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + MVT DstVT = Op.getSimpleValueType(); + + AddrSpaceCastSDNode *N = cast(Op.getNode()); + unsigned SrcAS = N->getSrcAddressSpace(); + + assert(SrcAS != N->getDestAddressSpace() && + "addrspacecast must be between different address spaces"); + + if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) { + Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src); + } else if (DstVT == MVT::i64) { + Op = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src); + } else if (DstVT == MVT::i32) { + Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src); + } else { + report_fatal_error("Bad address space in addrspacecast"); + } + return Op; +} + SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const { // TODO: Eventually, the lowering of these nodes should be informed by or @@ -27773,9 +27831,21 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { - SmallVector Ops(Op->op_begin(), Op->op_end()); + + bool IsStrict = Op->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SmallVector Ops(Op->op_begin() + Offset, Op->op_end()); + + SDLoc dl(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; + std::pair Tmp = makeLibCall(DAG, Call, MVT::f128, Ops, + CallOptions, dl, Chain); + + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } /// Provide custom lowering hooks for some operations. @@ -27825,9 +27895,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); - case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG); + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: @@ -27902,6 +27973,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GC_TRANSITION_START: return LowerGC_TRANSITION_START(Op, DAG); case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG); + case ISD::ADDRSPACECAST: + return LowerADDRSPACECAST(Op, DAG); } } @@ -28691,6 +28764,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res.getValue(1)); return; } + case ISD::ADDRSPACECAST: { + SDValue Src = N->getOperand(0); + EVT DstVT = N->getValueType(0); + AddrSpaceCastSDNode *CastN = cast(N); + unsigned SrcAS = CastN->getSrcAddressSpace(); + + assert(SrcAS != CastN->getDestAddressSpace() && + "addrspacecast must be between different address spaces"); + + SDValue Res; + if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) + Res = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src); + else if (DstVT == MVT::i64) + Res = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src); + else if (DstVT == MVT::i32) + Res = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src); + else + report_fatal_error("Unrecognized addrspacecast type legalization"); + + Results.push_back(Res); + return; + } } } @@ -40433,6 +40528,7 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, static SDValue reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + assert(ML->isUnindexed() && "Unexpected indexed masked load!"); // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. // However, some target hooks may need to be added to know when the transform // is profitable. Endianness would also have to be considered. @@ -40460,6 +40556,7 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, static SDValue combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + assert(ML->isUnindexed() && "Unexpected indexed masked load!"); if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) return SDValue(); @@ -40495,10 +40592,10 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, // The new masked load has an undef pass-through operand. The select uses the // original pass-through operand. - SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(), - ML->getMask(), DAG.getUNDEF(VT), - ML->getMemoryVT(), ML->getMemOperand(), - ML->getExtensionType()); + SDValue NewML = DAG.getMaskedLoad( + VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), + DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), + ML->getAddressingMode(), ML->getExtensionType()); SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru()); @@ -40584,8 +40681,9 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), Mst->getMemoryVT())) { return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); + Mst->getBasePtr(), Mst->getOffset(), Mask, + Mst->getMemoryVT(), Mst->getMemOperand(), + Mst->getAddressingMode(), true); } return SDValue(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 576f2fa627cce..82f56f895a191 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1126,9 +1126,6 @@ namespace llvm { bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override; - bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, - bool IsSigned) const override; - /// Return true if EXTRACT_SUBVECTOR is cheap for this result type /// with this index. bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5051d5453f3ac..5917894dd3ee7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2078,7 +2078,7 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, Sched<[sched]>; + timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2089,8 +2089,9 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + let Uses = [MXCSR] in defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), @@ -2111,7 +2112,7 @@ multiclass avx512_cmp_scalar, - EVEX_4V, VEX_LIG, Sched<[sched]>; + EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), @@ -2121,7 +2122,7 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -2522,6 +2523,7 @@ def X86cmpm_imm_commute : SDNodeXForm { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), "vcmp"#_.Suffix, @@ -2553,6 +2555,7 @@ multiclass avx512_vcmp_common, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + } // Patterns for selecting with loads in other operand. def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), @@ -2582,6 +2585,7 @@ multiclass avx512_vcmp_common { // comparison code form (VCMP[EQ/LT/LE/...] + let Uses = [MXCSR] in defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, @@ -2639,7 +2643,7 @@ def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), multiclass avx512_scalar_fpclass opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, Predicate prd> { - let Predicates = [prd], ExeDomain = _.ExeDomain in { + let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { def rr : AVX512 opc, string OpcodeStr, multiclass avx512_vector_fpclass opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, string mem>{ - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { def rr : AVX512; multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, X86FoldableSchedWrite sched, bit IsCommutable> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, SDNode VecNode, X86FoldableSchedWrite sched, bit IsCommutable = 0> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrb_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, - Sched<[sched]>; + Sched<[sched]>, SIMD_EXC; defm rm_Int : AVX512_maskable_scalar, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; - let isCodeGenOnly = 1, Predicates = [HasAVX512] in { + let isCodeGenOnly = 1, Predicates = [HasAVX512], + Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -5356,6 +5361,7 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, EVEX2VEXOverride; } + let Uses = [MXCSR] in defm rrb_Int : AVX512_maskable_scalar opc, string OpcodeStr, } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, SchedWriteFCmp.Scl, "VMINCSS">, XS, - EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, SchedWriteFCmp.Scl, "VMINCSD">, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; + EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, SchedWriteFCmp.Scl, "VMAXCSS">, XS, - EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, SchedWriteFCmp.Scl, "VMAXCSD">, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; + EVEX_CD8<64, CD8VT1>, SIMD_EXC; multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable, bit IsKCommutable = IsCommutable> { - let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { + let ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr: AVX512_maskable opc, string OpcodeStr, SDPatternOperator OpN multiclass avx512_fp_round_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrb: AVX512_maskable opc, string OpcodeStr, multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDPatternOperator OpNodeSAE, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrb: AVX512_maskable opc, string OpcodeStr, SDPatternOperator Op } } +let Uses = [MXCSR] in multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd, X86SchedWriteSizes sched> { defm PSZ : avx512_fp_round_packed opc, string OpcodeStr, SDNode OpNodeR EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } +let Uses = [MXCSR] in multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd, X86SchedWriteSizes sched> { defm PSZ : avx512_fp_sae_packed; } +let Uses = [], mayRaiseFPException = 0 in { defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, @@ -5578,10 +5588,11 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; +} multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr: AVX512_maskable opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr: AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR], mayRaiseFPException = 1 in { defm r: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR] in defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR], mayRaiseFPException = 1 in { defm r: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR] in defm rb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR], mayRaiseFPException = 1 in { defm r: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, + Uses = [MXCSR] in defm rb: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; + AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; let mayLoad = 1 in defm m_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; + AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; + let Uses = [MXCSR] in defm rb_Int: AVX512_maskable_3src_scalar, @@ -6648,13 +6666,14 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in { (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>; + !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; def m : AVX512FMA3S, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; + [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; + let Uses = [MXCSR] in def rb : AVX512FMA3S opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm, - string mem> { + string mem, list _Uses = [MXCSR], + bit _mayRaiseFPException = 1> { +let ExeDomain = DstVT.ExeDomain, Uses = _Uses, + mayRaiseFPException = _mayRaiseFPException in { let hasSideEffects = 0, isCodeGenOnly = 1 in { def rr : SI opc, SDPatternOperator OpNode, X86FoldableSched (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2)))]>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; +} def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", (!cast(NAME#"rr_Int") DstVT.RC:$dst, DstVT.RC:$src1, SrcRC:$src2), 0, "att">; @@ -7032,6 +7055,7 @@ multiclass avx512_vcvtsi_round opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm, string mem> { + let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in def rrb_Int : SI, XS, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, - v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">, + v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SD, GR64, @@ -7105,7 +7129,7 @@ defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, - i32mem, loadi32, "cvtusi2sd", "l">, + i32mem, loadi32, "cvtusi2sd", "l", [], 0>, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SD, GR64, @@ -7145,11 +7169,12 @@ multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT, SDNode OpNodeRnd, X86FoldableSchedWrite sched, string asm, string aliasStr> { - let Predicates = [HasAVX512] in { + let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { def rr_Int : SI, - EVEX, VEX_LIG, Sched<[sched]>; + EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; + let Uses = [MXCSR] in def rrb_Int : SI, @@ -7159,7 +7184,7 @@ multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT, !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>, - EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } // Predicates = [HasAVX512] def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", @@ -7286,22 +7311,23 @@ multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, SDNode OpNodeInt, SDNode OpNodeSAE, X86FoldableSchedWrite sched, string aliasStr>{ -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { let isCodeGenOnly = 1 in { def rr : AVX512, - EVEX, VEX_LIG, Sched<[sched]>; + EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; def rm : AVX512, - EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } def rr_Int : AVX512, - EVEX, VEX_LIG, Sched<[sched]>; + EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; + let Uses = [MXCSR] in def rrb_Int : AVX512, @@ -7311,7 +7337,7 @@ let Predicates = [HasAVX512] in { !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>, - EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } //HasAVX512 def : InstAlias opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode, X86FoldableSchedWrite sched> { @@ -7387,6 +7414,7 @@ multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _ multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { + let Uses = [MXCSR] in defm rrb_Int : AVX512_maskable_scalar opc, string OpcodeStr, X86VectorVTIn multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { + let Uses = [MXCSR] in defm rrb_Int : AVX512_maskable_scalar opc, string OpcodeStr, X86VectorVTInfo _, string Alias = "", X86MemOperand MemOp = _Src.MemOp, RegisterClass MaskRC = _.KRCWM, dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { - +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr : AVX512_maskable_common opc, string OpcodeStr, X86VectorVTInfo _, _.RC:$src0), vselect, "$src0 = $dst">, EVEX, EVEX_B, Sched<[sched.Folded]>; + } } // Coversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { + let Uses = [MXCSR] in defm rrb : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { + let Uses = [MXCSR] in defm rrb : AVX512_maskable, mayRaiseFPException = 0 in multiclass avx512_cvtdq2pd opc, string OpcodeStr, SDNode OpNode, SDNode OpNode128, X86SchedWriteWidths sched> { // No rounding in this op @@ -8521,6 +8554,7 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), // Half precision conversion instructions //===----------------------------------------------------------------------===// +let Uses = [MXCSR], mayRaiseFPException = 1 in multiclass avx512_cvtph2ps { @@ -8537,6 +8571,7 @@ multiclass avx512_cvtph2ps { + let Uses = [MXCSR] in defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "{sae}, $src", "$src, {sae}", @@ -8568,7 +8603,7 @@ let Predicates = [HasVLX] in { multiclass avx512_cvtps2ph { -let ExeDomain = GenericDomain in { +let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -8605,7 +8640,7 @@ let ExeDomain = GenericDomain in { multiclass avx512_cvtps2ph_sae { - let hasSideEffects = 0 in + let hasSideEffects = 0, Uses = [MXCSR] in defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), @@ -8664,52 +8699,53 @@ let Predicates = [HasVLX] in { // Unordered/Ordered scalar fp compare with Sae and set EFLAGS multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, - string OpcodeStr, X86FoldableSchedWrite sched> { - let hasSideEffects = 0 in + string OpcodeStr, Domain d, + X86FoldableSchedWrite sched = WriteFCom> { + let hasSideEffects = 0, Uses = [MXCSR] in def rrb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, + "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFCom>, PD, EVEX, + "ucomisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, - "comiss", WriteFCom>, PS, EVEX, VEX_LIG, + "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, - "comisd", WriteFCom>, PD, EVEX, + "comisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, + sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFCom>, PD, EVEX, + sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } @@ -8717,7 +8753,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { + let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { defm rr : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, } } +let Uses = [MXCSR] in multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched> { defm PSZ : avx512_fp14_p; multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { defm r : AVX512_maskable_scalar, - Sched<[sched]>; + Sched<[sched]>, SIMD_EXC; defm rb : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -8840,7 +8877,7 @@ defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm r : AVX512_maskable, @@ -8862,7 +8899,7 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, } multiclass avx512_fp28_p_sae opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, X86FoldableSchedWrite sched> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rb : AVX512_maskable opc, string OpcodeStr, multiclass avx512_sqrt_packed opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm r: AVX512_maskable, EVEX, @@ -8942,6 +8979,7 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, } } +let Uses = [MXCSR], mayRaiseFPException = 1 in multiclass avx512_sqrt_packed_all opc, string OpcodeStr, X86SchedWriteSizes sched> { defm PSZ : avx512_sqrt_packed opc, string OpcodeStr, } } +let Uses = [MXCSR] in multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, X86SchedWriteSizes sched> { defm PSZ : avx512_sqrt_packed_round opc, string OpcodeStr, X86FoldableSchedWri "$src2, $src1", "$src1, $src2", (X86fsqrts (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, - Sched<[sched]>; + Sched<[sched]>, SIMD_EXC; defm m_Int : AVX512_maskable_scalar, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + let Uses = [MXCSR] in defm rb_Int : AVX512_maskable_scalar opc, string OpcodeStr, X86FoldableSchedWri def r : I, - Sched<[sched]>; + Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in def m : I, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -9047,8 +9087,9 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 timm:$src3)))>, - Sched<[sched]>; + Sched<[sched]>, SIMD_EXC; + let Uses = [MXCSR] in defm rb_Int : AVX512_maskable_scalar opc, string OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { def r : I, Sched<[sched]>; + []>, Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in def m : I, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -10101,7 +10142,7 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, //all instruction created with FROUND_CURRENT multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable opc, string OpcodeStr, SDNode OpNo multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrib : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, // op(reg_vec2,mem_scalar,imm) multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrib : AVX512_maskable opc, string OpcodeStr, //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm NAME#rrib : AVX512_maskable_scalar, mayRaiseFPException = 0 in { defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, SchedWriteFShuffleSizes, 0, 1>; defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, SchedWriteFShuffleSizes>; +} defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, SchedWriteShuffle, HasBWI>; @@ -11587,7 +11630,8 @@ let Predicates = [HasVLX] in { multiclass avx512_fixupimm_packed opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo TblVT>{ - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, + Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable_3src opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo TblVT> : avx512_fixupimm_packed { -let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { +let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { defm rrib : AVX512_maskable_3src opc, string OpcodeStr, (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 timm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; + let Uses = [MXCSR] in defm rrib : AVX512_maskable_3src_scalar opc, string OpcodeStr, (_src3VT.VT (scalar_to_vector (_src3VT.ScalarLdFrag addr:$src3))), (i32 timm:$src4))>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -12166,7 +12211,7 @@ defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, - Constraints = "$src1 = $dst" in { + Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), "v4fmaddps", "$src3, $src2", "$src2, $src3", @@ -12297,17 +12342,19 @@ defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", // Truncate Float to BFloat16 multiclass avx512_cvtps2bf16 opc, string OpcodeStr, X86SchedWriteWidths sched> { - let Predicates = [HasBF16] in { + let Predicates = [HasBF16], Uses = [], mayRaiseFPException = 0 in { defm Z : avx512_vcvt_fp, EVEX_V512; } let Predicates = [HasBF16, HasVLX] in { + let Uses = [], mayRaiseFPException = 0 in { defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; + } def : InstAlias(NAME # "Z128rr") VR128X:$dst, diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 0cca71bdc4315..4c84f4f2460db 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -95,7 +95,8 @@ multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } -let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in +let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1, + Uses = [MXCSR], mayRaiseFPException = 1 in multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, string Suff, PatFrag MemFrag128, PatFrag MemFrag256, @@ -237,7 +238,7 @@ multiclass fma3s_rm_132 opc, string OpcodeStr, } let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, - hasSideEffects = 0 in + hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, string OpStr, string PackTy, string Suff, SDNode OpNode, RegisterClass RC, @@ -263,7 +264,8 @@ multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, // the lowest element of the FMA*_Int instruction. Even though such analysis // may be not implemented yet we allow the routines doing the actual commute // transformation to decide if one or another instruction is commutable or not. -let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in +let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0, + Uses = [MXCSR], mayRaiseFPException = 1 in multiclass fma3s_rm_int opc, string OpcodeStr, Operand memopr, RegisterClass RC, X86FoldableSchedWrite sched> { @@ -384,6 +386,7 @@ defm : scalar_fma_patterns opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, PatFrag mem_frag, X86FoldableSchedWrite sched> { @@ -425,7 +428,8 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in multiclass fma4s_int opc, string OpcodeStr, Operand memop, ValueType VT, X86FoldableSchedWrite sched> { -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, + Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int : FMA4S_Int opc, string OpcodeStr, SDNode OpNode, ValueType OpVT128, ValueType OpVT256, PatFrag ld_frag128, PatFrag ld_frag256, diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 1b7a2ccde51fa..d9cf560831300 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -286,26 +286,26 @@ let Uses = [FPCW], mayRaiseFPException = 1 in { // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { -defm ADD : FPBinary_rr; -defm SUB : FPBinary_rr; -defm MUL : FPBinary_rr; -defm DIV : FPBinary_rr; +defm ADD : FPBinary_rr; +defm SUB : FPBinary_rr; +defm MUL : FPBinary_rr; +defm DIV : FPBinary_rr; } // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { -defm ADD : FPBinary; -defm SUB : FPBinary; -defm SUBR: FPBinary; +defm ADD : FPBinary; +defm SUB : FPBinary; +defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { -defm MUL : FPBinary; +defm MUL : FPBinary; } let SchedRW = [WriteFDivLd] in { -defm DIV : FPBinary; -defm DIVR: FPBinary; +defm DIV : FPBinary; +defm DIVR: FPBinary; } } // Uses = [FPCW], mayRaiseFPException = 1 @@ -366,7 +366,7 @@ defm ABS : FPUnary; let Uses = [FPCW], mayRaiseFPException = 1 in { let SchedRW = [WriteFSqrt80] in -defm SQRT: FPUnary; +defm SQRT: FPUnary; let SchedRW = [WriteFCom] in { let hasSideEffects = 0 in { @@ -790,19 +790,19 @@ def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, +def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, +def : Pat<(f64 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index de6f8a81dff65..1a4f7e1e6bbd6 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -706,6 +706,10 @@ def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; +def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> +]>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -1040,9 +1044,10 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, INSERT_get_vinsert256_imm>; def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_ld node:$src1, node:$src2, node:$src3), [{ + (masked_ld node:$src1, undef, node:$src2, node:$src3), [{ return !cast(N)->isExpandingLoad() && - cast(N)->getExtensionType() == ISD::NON_EXTLOAD; + cast(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast(N)->isUnindexed(); }]>; def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1055,17 +1060,19 @@ def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), }]>; def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_ld node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isExpandingLoad(); + (masked_ld node:$src1, undef, node:$src2, node:$src3), [{ + return cast(N)->isExpandingLoad() && + cast(N)->isUnindexed(); }]>; // Masked store fragments. // X86mstore can't be implemented in core DAG files because some targets // do not support vector types (llvm-tblgen will fail). def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return (!cast(N)->isTruncatingStore()) && - (!cast(N)->isCompressingStore()); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return !cast(N)->isTruncatingStore() && + !cast(N)->isCompressingStore() && + cast(N)->isUnindexed(); }]>; def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1078,16 +1085,18 @@ def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), }]>; def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isCompressingStore(); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return cast(N)->isCompressingStore() && + cast(N)->isUnindexed(); }]>; // masked truncstore fragments // X86mtruncstore can't be implemented in core DAG files because some targets // doesn't support vector type ( llvm-tblgen will fail) def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isTruncatingStore(); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); }]>; def masked_truncstorevi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1111,10 +1120,10 @@ def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore, def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore, +def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore, +def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index b66d9ffd5d5e2..41c6fc4aaf673 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1761,10 +1761,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VCMPPSZ128rrik: case X86::VCMPPDZ256rrik: case X86::VCMPPSZ256rrik: { - unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f; + unsigned Imm = + MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f; Imm = X86::getSwappedVCMPImm(Imm); auto &WorkingMI = cloneIfNew(MI); - WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); + WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ffdcb65c93bd9..b8e80bcd566a8 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -823,7 +823,9 @@ let Constraints = "$src1 = $dst" in { multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, string mem, X86FoldableSchedWrite sched, + Domain d, SchedRead Int2Fpu = ReadDefault> { + let ExeDomain = d in { def rr : SI, @@ -832,6 +834,7 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, mem#"\t{$src, $dst|$dst, $src}", [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, Sched<[sched.Folded]>; + } } multiclass sse12_cvt_p opc, RegisterClass RC, X86MemOperand x86memop, @@ -851,8 +854,8 @@ let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, string mem, - X86FoldableSchedWrite sched> { -let hasSideEffects = 0, Predicates = [UseAVX] in { + X86FoldableSchedWrite sched, Domain d> { +let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { def rr : SI, Sched<[sched, ReadDefault, ReadInt2Fpu]>; @@ -867,19 +870,19 @@ let hasSideEffects = 0, Predicates = [UseAVX] in { let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; } @@ -889,13 +892,17 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + VEX_W, VEX_LIG, SIMD_EXC; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", - WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; + WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + VEX_W, VEX_LIG, SIMD_EXC; } // isCodeGenOnly = 1 let Predicates = [UseAVX] in { @@ -921,28 +928,28 @@ let Predicates = [UseAVX] in { let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, REX_W, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, REX_W, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", - WriteCvtI2SD, ReadInt2Fpu>, XD; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM @@ -951,7 +958,8 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, ValueType DstVT, ValueType SrcVT, SDNode OpNode, Operand memop, ComplexPattern mem_cpat, string asm, - X86FoldableSchedWrite sched> { + X86FoldableSchedWrite sched, Domain d> { +let ExeDomain = d in { def rr_Int : SI, @@ -961,12 +969,13 @@ multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>, Sched<[sched.Folded]>; } +} multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, string mem, X86FoldableSchedWrite sched, - bit Is2Addr = 1> { -let hasSideEffects = 0 in { + Domain d, bit Is2Addr = 1> { +let hasSideEffects = 0, ExeDomain = d in { def rr_Int : SI, XD, VEX, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", - WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; } defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, - sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; + sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, + SSEPackedDouble>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, - sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, + SSEPackedDouble>, XD, REX_W; } let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXC; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, + XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, + XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; + i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, + XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, + XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXC; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, + XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXC; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, + XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; + i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, + XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXC; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, + XD, REX_W, SIMD_EXC; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1052,32 +1071,34 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, - "cvttss2si", WriteCvtSS2I>, + "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG, VEX_W; defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSS2I>, XD, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, - "cvttsd2si", WriteCvtSS2I>, + "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG, VEX_W; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I, SSEPackedSingle>, XS; defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, - "cvttss2si", WriteCvtSS2I>, XS, REX_W; + "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, + XS, REX_W; defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSD2I>, XD; + WriteCvtSD2I, SSEPackedDouble>, XD; defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, - "cvttsd2si", WriteCvtSD2I>, XD, REX_W; + "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, + XD, REX_W; } def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", @@ -1117,18 +1138,18 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I, SSEPackedSingle>, XS; defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, REX_W; + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, "vcvtdq2ps\t{$src, $dst|$dst, $src}", @@ -1815,9 +1836,10 @@ let Constraints = "$src1 = $dst" in { // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, - PatFrag ld_frag, string OpcodeStr, - X86FoldableSchedWrite sched> { -let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { + PatFrag ld_frag, string OpcodeStr, Domain d, + X86FoldableSchedWrite sched = WriteFCom> { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, + ExeDomain = d in { def rr: SI, @@ -1835,8 +1857,9 @@ let mayLoad = 1 in multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, ValueType vt, Operand memop, ComplexPattern mem_cpat, string OpcodeStr, - X86FoldableSchedWrite sched> { -let Uses = [MXCSR], mayRaiseFPException = 1 in { + Domain d, + X86FoldableSchedWrite sched = WriteFCom> { +let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in { def rr_Int: SI, @@ -1852,49 +1875,49 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFCom>, PS; + "ucomiss", SSEPackedSingle>, PS; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFCom>, PD; + "ucomisd", SSEPackedDouble>, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFCom>, PS; + "comiss", SSEPackedSingle>, PS; defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFCom>, PD; + "comisd", SSEPackedDouble>, PD; } let isCodeGenOnly = 1 in { defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFCom>, PS; + sse_load_f32, "ucomiss", SSEPackedSingle>, PS; defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFCom>, PD; + sse_load_f64, "ucomisd", SSEPackedDouble>, PD; defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFCom>, PS; + sse_load_f32, "comiss", SSEPackedSingle>, PS; defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFCom>, PD; + sse_load_f64, "comisd", SSEPackedDouble>, PD; } } // Defs = [EFLAGS] @@ -5519,7 +5542,7 @@ let ExeDomain = SSEPackedDouble in { // FP round - roundss, roundps, roundsd, roundpd let Predicates = [HasAVX, NoVLX] in { - let ExeDomain = SSEPackedSingle in { + let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in { // Intrinsic form defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>, @@ -5529,7 +5552,7 @@ let Predicates = [HasAVX, NoVLX] in { VEX, VEX_L, VEX_WIG; } - let ExeDomain = SSEPackedDouble in { + let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in { defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>, VEX, VEX_WIG; @@ -5541,9 +5564,9 @@ let Predicates = [HasAVX, NoVLX] in { let Predicates = [UseAVX] in { defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, v4f32, v2f64, X86RndScales, 0>, - VEX_4V, VEX_LIG, VEX_WIG; + VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, - VEX_4V, VEX_LIG, VEX_WIG; + VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; } let Predicates = [UseAVX] in { @@ -7303,12 +7326,12 @@ multiclass f16c_ps2ph; - defm VCVTPH2PSY : f16c_ph2ps, VEX_L; + defm VCVTPH2PS : f16c_ph2ps, SIMD_EXC; + defm VCVTPH2PSY : f16c_ph2ps, VEX_L, SIMD_EXC; defm VCVTPS2PH : f16c_ps2ph; + WriteCvtPS2PHSt>, SIMD_EXC; defm VCVTPS2PHY : f16c_ps2ph, VEX_L; + WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC; // Pattern match vcvtph2ps of a scalar i64 load. def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index c6da4b09dd60f..b19d1263e0c91 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/X86BaseInfo.h" #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "llvm/CodeGen/MacroFusion.h" @@ -18,160 +19,13 @@ using namespace llvm; -namespace { - -// The classification for the first instruction. -enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid }; - -// The classification for the second instruction (jump). -enum class JumpKind { - // JE, JL, JG and variants. - ELG, - // JA, JB and variants. - AB, - // JS, JP, JO and variants. - SPO, - // Not a fusable jump. - Invalid, -}; - -} // namespace - -static FirstInstrKind classifyFirst(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - return FirstInstrKind::Invalid; - case X86::TEST8rr: - case X86::TEST16rr: - case X86::TEST32rr: - case X86::TEST64rr: - case X86::TEST8ri: - case X86::TEST16ri: - case X86::TEST32ri: - case X86::TEST64ri32: - case X86::TEST8mr: - case X86::TEST16mr: - case X86::TEST32mr: - case X86::TEST64mr: - return FirstInstrKind::Test; - case X86::AND16ri: - case X86::AND16ri8: - case X86::AND16rm: - case X86::AND16rr: - case X86::AND32ri: - case X86::AND32ri8: - case X86::AND32rm: - case X86::AND32rr: - case X86::AND64ri32: - case X86::AND64ri8: - case X86::AND64rm: - case X86::AND64rr: - case X86::AND8ri: - case X86::AND8rm: - case X86::AND8rr: - return FirstInstrKind::And; - case X86::CMP16ri: - case X86::CMP16ri8: - case X86::CMP16rm: - case X86::CMP16rr: - case X86::CMP16mr: - case X86::CMP32ri: - case X86::CMP32ri8: - case X86::CMP32rm: - case X86::CMP32rr: - case X86::CMP32mr: - case X86::CMP64ri32: - case X86::CMP64ri8: - case X86::CMP64rm: - case X86::CMP64rr: - case X86::CMP64mr: - case X86::CMP8ri: - case X86::CMP8rm: - case X86::CMP8rr: - case X86::CMP8mr: - return FirstInstrKind::Cmp; - case X86::ADD16ri: - case X86::ADD16ri8: - case X86::ADD16ri8_DB: - case X86::ADD16ri_DB: - case X86::ADD16rm: - case X86::ADD16rr: - case X86::ADD16rr_DB: - case X86::ADD32ri: - case X86::ADD32ri8: - case X86::ADD32ri8_DB: - case X86::ADD32ri_DB: - case X86::ADD32rm: - case X86::ADD32rr: - case X86::ADD32rr_DB: - case X86::ADD64ri32: - case X86::ADD64ri32_DB: - case X86::ADD64ri8: - case X86::ADD64ri8_DB: - case X86::ADD64rm: - case X86::ADD64rr: - case X86::ADD64rr_DB: - case X86::ADD8ri: - case X86::ADD8ri_DB: - case X86::ADD8rm: - case X86::ADD8rr: - case X86::ADD8rr_DB: - case X86::SUB16ri: - case X86::SUB16ri8: - case X86::SUB16rm: - case X86::SUB16rr: - case X86::SUB32ri: - case X86::SUB32ri8: - case X86::SUB32rm: - case X86::SUB32rr: - case X86::SUB64ri32: - case X86::SUB64ri8: - case X86::SUB64rm: - case X86::SUB64rr: - case X86::SUB8ri: - case X86::SUB8rm: - case X86::SUB8rr: - return FirstInstrKind::ALU; - case X86::INC16r: - case X86::INC32r: - case X86::INC64r: - case X86::INC8r: - case X86::DEC16r: - case X86::DEC32r: - case X86::DEC64r: - case X86::DEC8r: - return FirstInstrKind::IncDec; - } +static X86::FirstMacroFusionInstKind classifyFirst(const MachineInstr &MI) { + return X86::classifyFirstOpcodeInMacroFusion(MI.getOpcode()); } -static JumpKind classifySecond(const MachineInstr &MI) { +static X86::SecondMacroFusionInstKind classifySecond(const MachineInstr &MI) { X86::CondCode CC = X86::getCondFromBranch(MI); - if (CC == X86::COND_INVALID) - return JumpKind::Invalid; - - switch (CC) { - default: - return JumpKind::Invalid; - case X86::COND_E: - case X86::COND_NE: - case X86::COND_L: - case X86::COND_LE: - case X86::COND_G: - case X86::COND_GE: - return JumpKind::ELG; - case X86::COND_B: - case X86::COND_BE: - case X86::COND_A: - case X86::COND_AE: - return JumpKind::AB; - case X86::COND_S: - case X86::COND_NS: - case X86::COND_P: - case X86::COND_NP: - case X86::COND_O: - case X86::COND_NO: - return JumpKind::SPO; - } + return X86::classifySecondCondCodeInMacroFusion(CC); } /// Check if the instr pair, FirstMI and SecondMI, should be fused @@ -187,40 +41,27 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (!(ST.hasBranchFusion() || ST.hasMacroFusion())) return false; - const JumpKind BranchKind = classifySecond(SecondMI); + const X86::SecondMacroFusionInstKind BranchKind = classifySecond(SecondMI); - if (BranchKind == JumpKind::Invalid) + if (BranchKind == X86::SecondMacroFusionInstKind::Invalid) return false; // Second cannot be fused with anything. if (FirstMI == nullptr) return true; // We're only checking whether Second can be fused at all. - const FirstInstrKind TestKind = classifyFirst(*FirstMI); + const X86::FirstMacroFusionInstKind TestKind = classifyFirst(*FirstMI); if (ST.hasBranchFusion()) { // Branch fusion can merge CMP and TEST with all conditional jumps. - return (TestKind == FirstInstrKind::Cmp || - TestKind == FirstInstrKind::Test); + return (TestKind == X86::FirstMacroFusionInstKind::Cmp || + TestKind == X86::FirstMacroFusionInstKind::Test); } if (ST.hasMacroFusion()) { - // Macro Fusion rules are a bit more complex. See Agner Fog's - // Microarchitecture table 9.2 "Instruction Fusion". - switch (TestKind) { - case FirstInstrKind::Test: - case FirstInstrKind::And: - return true; - case FirstInstrKind::Cmp: - case FirstInstrKind::ALU: - return BranchKind == JumpKind::ELG || BranchKind == JumpKind::AB; - case FirstInstrKind::IncDec: - return BranchKind == JumpKind::ELG; - case FirstInstrKind::Invalid: - return false; - } + return X86::isMacroFused(TestKind, BranchKind); } - llvm_unreachable("unknown branch fusion type"); + llvm_unreachable("unknown fusion type"); } namespace llvm { diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 3809a14178fdf..f69626b2622e4 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -530,23 +530,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::MXCSR); // Set the stack-pointer register and its aliases as reserved. - for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); - ++I) - Reserved.set(*I); + for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP)) + Reserved.set(SubReg); // Set the Shadow Stack Pointer as reserved. Reserved.set(X86::SSP); // Set the instruction pointer register and its aliases as reserved. - for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid(); - ++I) - Reserved.set(*I); + for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP)) + Reserved.set(SubReg); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { - for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid(); - ++I) - Reserved.set(*I); + for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) + Reserved.set(SubReg); } // Set the base-pointer register and its aliases as reserved if needed. @@ -559,9 +556,8 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { "this calling convention."); Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); - for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); - I.isValid(); ++I) - Reserved.set(*I); + for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr)) + Reserved.set(SubReg); } // Mark the segment registers as reserved. diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index f26c2d4c4a287..3cfaf714e93e8 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -295,8 +295,8 @@ def FPSW : X86Reg<"fpsr", 0>; def FPCW : X86Reg<"fpcr", 0>; // SIMD Floating-point control register. -// Note: We only model the current rounding modes and the IEEE masks. -// IEEE flags, FTZ and DAZ are not modeled here. +// Note: We only model the "Uses" of the control bits: current rounding modes, +// DAZ, FTZ and exception masks. We don't model the "Defs" of flag bits. def MXCSR : X86Reg<"mxcsr", 0>; // Status flags register. diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0b3a5319baac3..f64fedd8cbb6a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2377,6 +2377,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + static const CostTblEntry SLMCostTbl[] = { + { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 }, + { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 }, + { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 }, + { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 } + }; + assert(Val->isVectorTy() && "This must be a vector type"); Type *ScalarType = Val->getScalarType(); @@ -2396,6 +2403,13 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // Floating point scalars are already located in index #0. if (ScalarType->isFloatingPointTy() && Index == 0) return 0; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Unexpected vector opcode"); + MVT MScalarTy = LT.second.getScalarType(); + if (ST->isSLM()) + if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy)) + return LT.first * Entry->Cost; } // Add to the base cost if we know that the extracted element of a vector is diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 44d73b5ad5c04..48da7e7bdd03c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -308,15 +308,16 @@ static const Value *getPointerOperand(const Instruction *I) { return nullptr; } -static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I, - int64_t &BytesOffset, - const DataLayout &DL) { +static const Value * +getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, + const DataLayout &DL, + bool AllowNonInbounds = false) { const Value *Ptr = getPointerOperand(I); if (!Ptr) return nullptr; return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, - /*AllowNonInbounds*/ false); + AllowNonInbounds); } ChangeStatus AbstractAttribute::update(Attributor &A) { @@ -1702,8 +1703,7 @@ static int64_t getKnownNonNullAndDerefBytesForUse( return 0; } if (auto *GEP = dyn_cast(I)) - if (GEP->hasAllZeroIndices() || - (GEP->isInBounds() && GEP->hasAllConstantIndices())) { + if (GEP->hasAllConstantIndices()) { TrackUse = true; return 0; } @@ -1718,6 +1718,18 @@ static int64_t getKnownNonNullAndDerefBytesForUse( return std::max(int64_t(0), DerefBytes); } } + + /// Corner case when an offset is 0. + if (const Value *Base = getBasePointerOfAccessPointerOperand( + I, Offset, DL, /*AllowNonInbounds*/ true)) { + if (Offset == 0 && Base == &AssociatedValue && + getPointerOperand(I) == UseV) { + int64_t DerefBytes = + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); + IsNonNull |= !NullPointerIsDefined; + return std::max(int64_t(0), DerefBytes); + } + } if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL, /*AllowNonInbounds*/ false)) { @@ -2949,16 +2961,48 @@ struct AADereferenceableImpl : AADereferenceable { const StateType &getState() const override { return *this; } /// } + /// Helper function for collecting accessed bytes in must-be-executed-context + void addAccessedBytesForUse(Attributor &A, const Use *U, + const Instruction *I) { + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return; + + Type *PtrTy = UseV->getType(); + const DataLayout &DL = A.getDataLayout(); + int64_t Offset; + if (const Value *Base = getBasePointerOfAccessPointerOperand( + I, Offset, DL, /*AllowNonInbounds*/ true)) { + if (Base == &getAssociatedValue() && getPointerOperand(I) == UseV) { + uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType()); + addAccessedBytes(Offset, Size); + } + } + return; + } + /// See AAFromMustBeExecutedContext bool followUse(Attributor &A, const Use *U, const Instruction *I) { bool IsNonNull = false; bool TrackUse = false; int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + + addAccessedBytesForUse(A, U, I); takeKnownDerefBytesMaximum(DerefBytes); return TrackUse; } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Change = AADereferenceable::manifest(A); + if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) { + removeAttrs({Attribute::DereferenceableOrNull}); + return ChangeStatus::CHANGED; + } + return Change; + } + void getDeducedAttributes(LLVMContext &Ctx, SmallVectorImpl &Attrs) const override { // TODO: Add *_globally support @@ -3119,6 +3163,20 @@ static unsigned int getKnownAlignForUse(Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &TrackUse) { + // We need to follow common pointer manipulation uses to the accesses they + // feed into. + if (isa(I)) { + TrackUse = true; + return 0; + } + if (auto *GEP = dyn_cast(I)) { + if (GEP->hasAllConstantIndices()) { + TrackUse = true; + return 0; + } + } + + unsigned Alignment = 0; if (ImmutableCallSite ICS = ImmutableCallSite(I)) { if (ICS.isBundleOperand(U) || ICS.isCallee(U)) return 0; @@ -3129,23 +3187,34 @@ static unsigned int getKnownAlignForUse(Attributor &A, // dependences here. auto &AlignAA = A.getAAFor(QueryingAA, IRP, /* TrackDependence */ false); - return AlignAA.getKnownAlign(); - } - - // We need to follow common pointer manipulation uses to the accesses they - // feed into. - // TODO: Consider gep instruction - if (isa(I)) { - TrackUse = true; - return 0; + Alignment = AlignAA.getKnownAlign(); } + const Value *UseV = U->get(); if (auto *SI = dyn_cast(I)) - return SI->getAlignment(); + Alignment = SI->getAlignment(); else if (auto *LI = dyn_cast(I)) - return LI->getAlignment(); + Alignment = LI->getAlignment(); - return 0; + if (Alignment <= 1) + return 0; + + auto &DL = A.getDataLayout(); + int64_t Offset; + + if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) { + if (Base == &AssociatedValue) { + // BasePointerAddr + Offset = Alignment * Q for some integer Q. + // So we can say that the maximum power of two which is a divisor of + // gcd(Offset, Alignment) is an alignment. + + uint32_t gcd = + greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment); + Alignment = llvm::PowerOf2Floor(gcd); + } + } + + return Alignment; } struct AAAlignImpl : AAAlign { AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {} diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 9db079c7734e2..2bd3df3add7a0 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -328,6 +328,9 @@ Function *HotColdSplitting::extractColdRegion( } CI->setIsNoInline(); + if (OrigF->hasSection()) + OutF->setSection(OrigF->getSection()); + markFunctionCold(*OutF, BFI != nullptr); LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 23ca03ff68b0d..92e9a8814f8ff 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2279,6 +2279,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::copysign: { + const APFloat *C; + if (match(II->getArgOperand(1), m_APFloat(C))) { + // If we know the sign bit of the sign argument, reduce to FABS/FNABS: + // copysign X, PosC --> fabs X + // copysign X, NegC --> fneg (fabs X) + Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, + II->getArgOperand(0), II); + if (C->isNegative()) + Fabs = Builder.CreateFNegFMF(Fabs, II); + + return replaceInstUsesWith(*II, Fabs); + } + break; + } case Intrinsic::fabs: { Value *Cond; Constant *LHS, *RHS; @@ -3314,6 +3329,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (match(Arg, m_Intrinsic(m_Value(ArgArg))) && II->getType() == ArgArg->getType()) return replaceInstUsesWith(*II, ArgArg); + Constant *XorMask; + if (match(Arg, + m_Xor(m_Intrinsic(m_Value(ArgArg)), + m_Constant(XorMask))) && + II->getType() == ArgArg->getType()) { + if (auto *CI = dyn_cast(XorMask)) { + if (CI->getValue().trunc(16).isAllOnesValue()) { + auto TrueVector = Builder.CreateVectorSplat( + II->getType()->getVectorNumElements(), Builder.getTrue()); + return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector); + } + } + } KnownBits ScalarKnown(32); if (SimplifyDemandedBits(II, 0, APInt::getLowBitsSet(32, 16), ScalarKnown, 0)) @@ -3358,7 +3386,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (const ConstantFP *C = dyn_cast(Src)) { const APFloat &ArgVal = C->getValueAPF(); - APFloat Val(ArgVal.getSemantics(), 1.0); + APFloat Val(ArgVal.getSemantics(), 1); APFloat::opStatus Status = Val.divide(ArgVal, APFloat::rmNearestTiesToEven); // Only do this if it was exact and therefore not dependent on the diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0390368c4bb40..2171c819fd9e2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" +#include using namespace llvm; using namespace PatternMatch; @@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { } /// This input value (which is known to have vector type) is being zero extended -/// or truncated to the specified vector type. +/// or truncated to the specified vector type. Since the zext/trunc is done +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, +/// endianness will impact which end of the vector that is extended or +/// truncated. +/// +/// A vector is always stored with index 0 at the lowest address, which +/// corresponds to the most significant bits for a big endian stored integer and +/// the least significant bits for little endian. A trunc/zext of an integer +/// impacts the big end of the integer. Thus, we need to add/remove elements at +/// the front of the vector for big endian targets, and the back of the vector +/// for little endian targets. +/// /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, - InstCombiner &IC) { +static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal, + VectorType *DestTy, + InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. @@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } + bool IsBigEndian = IC.getDataLayout().isBigEndian(); + unsigned SrcElts = SrcTy->getNumElements(); + unsigned DestElts = DestTy->getNumElements(); + + assert(SrcElts != DestElts && "Element counts should be different."); + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector ShuffleMask; + SmallVector ShuffleMaskStorage; + ArrayRef ShuffleMask; Value *V2; - if (SrcTy->getNumElements() > DestTy->getNumElements()) { - // If we're shrinking the number of elements, just shuffle in the low - // elements from the input and use undef as the second shuffle input. - V2 = UndefValue::get(SrcTy); - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(i); + // Produce an identify shuffle mask for the src vector. + ShuffleMaskStorage.resize(SrcElts); + std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0); + if (SrcElts > DestElts) { + // If we're shrinking the number of elements (rewriting an integer + // truncate), just shuffle in the elements corresponding to the least + // significant bits from the input and use undef as the second shuffle + // input. + V2 = UndefValue::get(SrcTy); + // Make sure the shuffle mask selects the "least significant bits" by + // keeping elements from back of the src vector for big endian, and from the + // front for little endian. + ShuffleMask = ShuffleMaskStorage; + if (IsBigEndian) + ShuffleMask = ShuffleMask.take_back(DestElts); + else + ShuffleMask = ShuffleMask.take_front(DestElts); } else { - // If we're increasing the number of elements, shuffle in all of the - // elements from InVal and fill the rest of the result elements with zeros - // from a constant zero. + // If we're increasing the number of elements (rewriting an integer zext), + // shuffle in all of the elements from InVal. Fill the rest of the result + // elements with zeros from a constant zero. V2 = Constant::getNullValue(SrcTy); - unsigned SrcElts = SrcTy->getNumElements(); - for (unsigned i = 0, e = SrcElts; i != e; ++i) - ShuffleMask.push_back(i); - - // The excess elements reference the first element of the zero input. - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) - ShuffleMask.push_back(SrcElts); + // Use first elt from V2 when indicating zero in the shuffle mask. + uint32_t NullElt = SrcElts; + // Extend with null values in the "most significant bits" by adding elements + // in front of the src vector for big endian, and at the back for little + // endian. + unsigned DeltaElts = DestElts - SrcElts; + if (IsBigEndian) + ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt); + else + ShuffleMaskStorage.append(DeltaElts, NullElt); + ShuffleMask = ShuffleMaskStorage; } return new ShuffleVectorInst(InVal, V2, @@ -2359,8 +2394,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } if (VectorType *DestVTy = dyn_cast(DestTy)) { - if (DestVTy->getNumElements() == 1 && - VectorType::isValidElementType(SrcTy)) { + if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); @@ -2375,8 +2409,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { CastInst *SrcCast = cast(Src); if (BitCastInst *BCIn = dyn_cast(SrcCast->getOperand(0))) if (isa(BCIn->getOperand(0)->getType())) - if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), - cast(DestTy), *this)) + if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts( + BCIn->getOperand(0), cast(DestTy), *this)) return I; } @@ -2392,7 +2426,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (SrcVTy->getNumElements() == 1) { // If our destination is not a vector, then make this a straight // scalar-scalar cast. - if (VectorType::isValidElementType(DestTy)) { + if (!DestTy->isVectorTy()) { Value *Elem = Builder.CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 5fb3ec8757133..071985eb64138 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2566,9 +2566,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, Type *Ty = Add->getType(); CmpInst::Predicate Pred = Cmp.getPredicate(); - if (!Add->hasOneUse()) - return nullptr; - // If the add does not wrap, we can always adjust the compare by subtracting // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE // are canonicalized to SGT/SLT/UGT/ULT. @@ -2602,6 +2599,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower)); } + if (!Add->hasOneUse()) + return nullptr; + // X+C (X & -C2) == C // iff C & (C2-1) == 0 // C2 is a power of 2 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 0b9128a9f5a1c..f7b39d98d4923 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1368,8 +1368,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { } // 1 urem X -> zext(X != 1) - if (match(Op0, m_One())) - return CastInst::CreateZExtOrBitCast(Builder.CreateICmpNE(Op1, Op0), Ty); + if (match(Op0, m_One())) { + Value *Cmp = Builder.CreateICmpNE(Op1, ConstantInt::get(Ty, 1)); + return CastInst::CreateZExtOrBitCast(Cmp, Ty); + } // X urem C -> X < C ? X : X - C, where C >= signbit. if (match(Op1, m_Negative())) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index bdfbd75d31a84..05a624fde86b6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -704,16 +704,24 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) && "Unexpected isUnsigned predicate!"); - // Account for swapped form of subtraction: ((a > b) ? b - a : 0). + // Ensure the sub is of the form: + // (a > b) ? a - b : 0 -> usub.sat(a, b) + // (a > b) ? b - a : 0 -> -usub.sat(a, b) + // Checking for both a-b and a+(-b) as a constant. bool IsNegative = false; - if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A)))) + const APInt *C; + if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))) || + (match(A, m_APInt(C)) && + match(TrueVal, m_Add(m_Specific(B), m_SpecificInt(-*C))))) IsNegative = true; - else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B)))) + else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))) && + !(match(B, m_APInt(C)) && + match(TrueVal, m_Add(m_Specific(A), m_SpecificInt(-*C))))) return nullptr; - // If sub is used anywhere else, we wouldn't be able to eliminate it - // afterwards. - if (!TrueVal->hasOneUse()) + // If we are adding a negate and the sub and icmp are used anywhere else, we + // would end up with more instructions. + if (IsNegative && !TrueVal->hasOneUse() && !ICI->hasOneUse()) return nullptr; // (a > b) ? a - b : 0 -> usub.sat(a, b) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d31cbc0882ee5..9fabe9def1104 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -435,13 +435,6 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } - - // If the input is a bitcast from x86_mmx, turn into a single bitcast from - // the mmx type to the scalar type. - if (CI->getOpcode() == Instruction::BitCast && - EI.getVectorOperandType()->getNumElements() == 1 && - CI->getOperand(0)->getType()->isX86_MMXTy()) - return new BitCastInst(CI->getOperand(0), EI.getType()); } } return nullptr; diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 831fdedfc5e55..c7e708127a41f 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2996,7 +2996,6 @@ void FunctionStackPoisoner::processStaticAllocas() { Instruction *InsBefore = AllocaVec[0]; IRBuilder<> IRB(InsBefore); - IRB.SetCurrentDebugLocation(EntryDebugLocation); // Make sure non-instrumented allocas stay in the entry block. Otherwise, // debug info is broken, because only entry-block allocas are treated as @@ -3091,14 +3090,12 @@ void FunctionStackPoisoner::processStaticAllocas() { Instruction *Term = SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false); IRBuilder<> IRBIf(Term); - IRBIf.SetCurrentDebugLocation(EntryDebugLocation); StackMallocIdx = StackMallocSizeClass(LocalStackSize); assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass); Value *FakeStackValue = IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize)); IRB.SetInsertPoint(InsBefore); - IRB.SetCurrentDebugLocation(EntryDebugLocation); FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, ConstantInt::get(IntptrTy, 0)); @@ -3106,14 +3103,11 @@ void FunctionStackPoisoner::processStaticAllocas() { IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy)); Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false); IRBIf.SetInsertPoint(Term); - IRBIf.SetCurrentDebugLocation(EntryDebugLocation); Value *AllocaValue = DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca; IRB.SetInsertPoint(InsBefore); - IRB.SetCurrentDebugLocation(EntryDebugLocation); LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack); - IRB.SetCurrentDebugLocation(EntryDebugLocation); IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca); DIExprFlags |= DIExpression::DerefBefore; } else { diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index dbe49cbc03c23..21077a52c154c 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -553,7 +553,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, unsigned NumUses = 0; bool OptForSize = Entry->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI); + llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI, + PGSOQueryType::IRPass); if (!OptForSize || std::distance(S,E) > 100) { for (auto ConstCand = S; ConstCand != E; ++ConstCand) { NumUses += ConstCand->Uses.size(); diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 2c4937b6bef21..6ce2d06058cf3 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -717,22 +717,6 @@ bool LoopInterchangeLegality::findInductionAndReductions( return true; } -static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { - for (PHINode &PHI : Block->phis()) { - // Reduction lcssa phi will have only 1 incoming block that from loop latch. - if (PHI.getNumIncomingValues() > 1) - return false; - Instruction *Ins = dyn_cast(PHI.getIncomingValue(0)); - if (!Ins) - return false; - // Incoming value for lcssa phi's in outer loop exit can only be inner loop - // exits lcssa phi else it would not be tightly nested. - if (!isa(Ins) && isOuterLoopExitBlock) - return false; - } - return true; -} - // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { @@ -831,21 +815,6 @@ bool LoopInterchangeLegality::currentLimitations() { return true; } - // TODO: We only handle LCSSA PHI's corresponding to reduction for now. - BasicBlock *InnerExit = InnerLoop->getExitBlock(); - if (!containsSafePHI(InnerExit, false)) { - LLVM_DEBUG( - dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with LCSSA PHIs can be interchange " - "currently."; - }); - return true; - } - // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have // more than 1 user of induction.next since it would result in broken code @@ -921,6 +890,28 @@ bool LoopInterchangeLegality::currentLimitations() { return false; } +// We currently only support LCSSA PHI nodes in the inner loop exit, if their +// users are either reduction PHIs or PHIs outside the outer loop (which means +// the we are only interested in the final value after the loop). +static bool +areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, + SmallPtrSetImpl &Reductions) { + BasicBlock *InnerExit = OuterL->getUniqueExitBlock(); + for (PHINode &PHI : InnerExit->phis()) { + // Reduction lcssa phi will have only 1 incoming block that from loop latch. + if (PHI.getNumIncomingValues() > 1) + return false; + if (any_of(PHI.users(), [&Reductions, OuterL](User *U) { + PHINode *PN = dyn_cast(U); + return !PN || (Reductions.find(PN) == Reductions.end() && + OuterL->contains(PN->getParent())); + })) { + return false; + } + } + return true; +} + // We currently support LCSSA PHI nodes in the outer loop exit, if their // incoming values do not come from the outer loop latch or if the // outer loop latch has a single predecessor. In that case, the value will @@ -928,7 +919,7 @@ bool LoopInterchangeLegality::currentLimitations() { // will still be true after interchanging. If we have multiple predecessor, // that may not be the case, e.g. because the outer loop latch may be executed // if the inner loop is not executed. -static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { +static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); for (PHINode &PHI : LoopNestExit->phis()) { // FIXME: We currently are not able to detect floating point reductions @@ -1013,7 +1004,19 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, return false; } - if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) { + if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop, + OuterInnerReductions)) { + LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Found unsupported PHI node in loop exit."; + }); + return false; + } + + if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) { LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"); ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 5b822b6b81807..598a85e5b9471 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -545,7 +545,8 @@ class LoadEliminationForLoop { auto *HeaderBB = L->getHeader(); auto *F = HeaderBB->getParent(); bool OptForSize = F->hasOptSize() || - llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI); + llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI, + PGSOQueryType::IRPass); if (OptForSize) { LLVM_DEBUG( dbgs() << "Versioning is needed but not allowed when optimizing " diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index bb314310cfa56..4c2b079c6bb5b 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -213,7 +213,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( // Apply size attributes bool OptForSize = L->getHeader()->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI); + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass); if (OptForSize) { UP.Threshold = UP.OptSizeThreshold; UP.PartialThreshold = UP.PartialOptSizeThreshold; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 13e44765985f4..d441c6bbf124b 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -331,6 +331,20 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, } } +// Return the top-most loop containing ExitBB and having ExitBB as exiting block +// or the loop containing ExitBB, if there is no parent loop containing ExitBB +// as exiting block. +static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) { + Loop *TopMost = LI.getLoopFor(ExitBB); + Loop *Current = TopMost; + while (Current) { + if (Current->isLoopExiting(ExitBB)) + TopMost = Current; + Current = Current->getParentLoop(); + } + return TopMost; +} + /// Unswitch a trivial branch if the condition is loop invariant. /// /// This routine should only be called when loop code leading to the branch has @@ -415,9 +429,10 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, }); // If we have scalar evolutions, we need to invalidate them including this - // loop and the loop containing the exit block. + // loop, the loop containing the exit block and the topmost parent loop + // exiting via LoopExitBB. if (SE) { - if (Loop *ExitL = LI.getLoopFor(LoopExitBB)) + if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI)) SE->forgetLoop(ExitL); else // Forget the entire nest as this exits the entire nest. diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 71aa585dfe5d6..26d48ee0d23fa 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -258,7 +258,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { // references in IR module (not in combined index), so we can // ignore them when computing import. We do not export references // of writeonly object. See computeImportForReferencedGlobals - if (ImportIndex.isWriteOnly(GVS) && GVS->refs().size()) + if (ImportIndex.isWriteOnly(GVS)) V->setInitializer(Constant::getNullValue(V->getValueType())); } } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 18a17119b47fd..44513b1f68275 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1119,6 +1119,45 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + ConstantInt *StopChar = dyn_cast(CI->getArgOperand(2)); + ConstantInt *N = dyn_cast(CI->getArgOperand(3)); + StringRef SrcStr; + if (CI->use_empty() && Dst == Src) + return Dst; + // memccpy(d, s, c, 0) -> nullptr + if (N) { + if (N->isNullValue()) + return Constant::getNullValue(CI->getType()); + if (!getConstantStringInfo(Src, SrcStr, /*Offset=*/0, + /*TrimAtNul=*/false) || + !StopChar) + return nullptr; + } else { + return nullptr; + } + + // Wrap arg 'c' of type int to char + size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF); + if (Pos == StringRef::npos) { + if (N->getZExtValue() <= SrcStr.size()) { + B.CreateMemCpy(Dst, 1, Src, 1, CI->getArgOperand(3)); + return Constant::getNullValue(CI->getType()); + } + return nullptr; + } + + Value *NewN = + ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue())); + // memccpy -> llvm.memcpy + B.CreateMemCpy(Dst, 1, Src, 1, NewN); + return Pos + 1 <= N->getZExtValue() + ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN) + : Constant::getNullValue(CI->getType()); +} + Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0); Value *N = CI->getArgOperand(2); @@ -1696,7 +1735,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // TODO: This whole transformation should be backend specific (e.g. some // backends might prefer libcalls or the limit for the exponent might // be different) and it should also consider optimizing for size. - APFloat LimF(ExpoF->getSemantics(), 33.0), + APFloat LimF(ExpoF->getSemantics(), 33), ExpoA(abs(*ExpoF)); if (ExpoA.compare(LimF) == APFloat::cmpLessThan) { // This transformation applies to integer or integer+0.5 exponents only. @@ -2716,7 +2755,8 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); if (OptForSize) return nullptr; @@ -2864,6 +2904,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_memccpy: + return optimizeMemCCpy(CI, Builder); case LibFunc_mempcpy: return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp index f1200471cb4f3..cab375225e892 100644 --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -28,6 +28,11 @@ cl::opt PGSOColdCodeOnly( cl::desc("Apply the profile guided size optimizations only " "to cold code.")); +cl::opt PGSOIRPassOrTestOnly( + "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(true), + cl::desc("Apply the profile guided size optimizations only" + "to the IR passes or tests.")); + cl::opt ForcePGSO( "force-pgso", cl::Hidden, cl::init(false), cl::desc("Force the (profiled-guided) size optimizations. ")); @@ -70,11 +75,15 @@ struct BasicBlockBFIAdapter { } // end anonymous namespace bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { - return shouldFuncOptimizeForSizeImpl(F, PSI, BFI); + BlockFrequencyInfo *BFI, + PGSOQueryType QueryType) { + return shouldFuncOptimizeForSizeImpl(F, PSI, BFI, + QueryType); } bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { - return shouldOptimizeForSizeImpl(BB, PSI, BFI); + BlockFrequencyInfo *BFI, + PGSOQueryType QueryType) { + return shouldOptimizeForSizeImpl(BB, PSI, BFI, + QueryType); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fcd8b05b88301..f614c3a29e558 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4668,14 +4668,26 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { SetVector Worklist; BasicBlock *Latch = TheLoop->getLoopLatch(); + // Instructions that are scalar with predication must not be considered + // uniform after vectorization, because that would create an erroneous + // replicating region where only a single instance out of VF should be formed. + // TODO: optimize such seldom cases if found important, see PR40816. + auto addToWorklistIfAllowed = [&](Instruction *I) -> void { + if (isScalarWithPredication(I, VF)) { + LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: " + << *I << "\n"); + return; + } + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *I << "\n"); + Worklist.insert(I); + }; + // Start with the conditional branch. If the branch condition is an // instruction contained in the loop that is only used by the branch, it is // uniform. auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); - if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) { - Worklist.insert(Cmp); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); - } + if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) + addToWorklistIfAllowed(Cmp); // Holds consecutive and consecutive-like pointers. Consecutive-like pointers // are pointers that are treated like consecutive pointers during @@ -4734,10 +4746,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // Add to the Worklist all consecutive and consecutive-like pointers that // aren't also identified as possibly non-uniform. for (auto *V : ConsecutiveLikePtrs) - if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) { - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); - Worklist.insert(V); - } + if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) + addToWorklistIfAllowed(V); // Expand Worklist in topological order: whenever a new instruction // is added , its users should be already inside Worklist. It ensures @@ -4763,10 +4773,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { return Worklist.count(J) || (OI == getLoadStorePointerOperand(J) && isUniformDecision(J, VF)); - })) { - Worklist.insert(OI); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); - } + })) + addToWorklistIfAllowed(OI); } } @@ -4808,11 +4816,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { continue; // The induction variable and its update instruction will remain uniform. - Worklist.insert(Ind); - Worklist.insert(IndUpdate); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); - LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate - << "\n"); + addToWorklistIfAllowed(Ind); + addToWorklistIfAllowed(IndUpdate); } Uniforms[VF].insert(Worklist.begin(), Worklist.end()); @@ -7434,7 +7439,8 @@ getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints, if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI))) + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass))) SEL = CM_ScalarEpilogueNotAllowedOptSize; else if (PreferPredicateOverEpilog || Hints.getPredicate() == LoopVectorizeHints::FK_Enabled || diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e4f50a8787df1..949988415a44c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -629,9 +629,10 @@ class BoUpSLP { return MinVecRegSize; } - /// Check if ArrayType or StructType is isomorphic to some VectorType. - /// Accepts homogeneous aggregate of vectors like - /// { <2 x float>, <2 x float> } + /// Check if homogeneous aggregate is isomorphic to some VectorType. + /// Accepts homogeneous multidimensional aggregate of scalars/vectors like + /// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> }, + /// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on. /// /// \returns number of elements in vector if isomorphism exists, 0 otherwise. unsigned canMapToVector(Type *T, const DataLayout &DL) const; @@ -3088,20 +3089,22 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const { - unsigned N; - Type *EltTy; - auto *ST = dyn_cast(T); - if (ST) { - N = ST->getNumElements(); - EltTy = *ST->element_begin(); - } else { - N = cast(T)->getNumElements(); - EltTy = cast(T)->getElementType(); - } - - if (auto *VT = dyn_cast(EltTy)) { - EltTy = VT->getElementType(); - N *= VT->getNumElements(); + unsigned N = 1; + Type *EltTy = T; + + while (isa(EltTy)) { + if (auto *ST = dyn_cast(EltTy)) { + // Check that struct is homogeneous. + for (const auto *Ty : ST->elements()) + if (Ty != *ST->element_begin()) + return 0; + N *= ST->getNumElements(); + EltTy = *ST->element_begin(); + } else { + auto *SeqT = cast(EltTy); + N *= SeqT->getNumElements(); + EltTy = SeqT->getElementType(); + } } if (!isValidElementType(EltTy)) @@ -3109,12 +3112,6 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const { uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N)); if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T)) return 0; - if (ST) { - // Check that struct is homogeneous. - for (const auto *Ty : ST->elements()) - if (Ty != *ST->element_begin()) - return 0; - } return N; } @@ -6940,57 +6937,54 @@ class HorizontalReduction { /// %rb = insertelement <4 x float> %ra, float %s1, i32 1 /// %rc = insertelement <4 x float> %rb, float %s2, i32 2 /// %rd = insertelement <4 x float> %rc, float %s3, i32 3 -/// starting from the last insertelement instruction. +/// starting from the last insertelement or insertvalue instruction. /// -/// Returns true if it matches -static bool findBuildVector(InsertElementInst *LastInsertElem, - TargetTransformInfo *TTI, - SmallVectorImpl &BuildVectorOpds, - int &UserCost) { - UserCost = 0; - Value *V = nullptr; - do { - if (auto *CI = dyn_cast(LastInsertElem->getOperand(2))) { - UserCost += TTI->getVectorInstrCost(Instruction::InsertElement, - LastInsertElem->getType(), - CI->getZExtValue()); - } - BuildVectorOpds.push_back(LastInsertElem->getOperand(1)); - V = LastInsertElem->getOperand(0); - if (isa(V)) - break; - LastInsertElem = dyn_cast(V); - if (!LastInsertElem || !LastInsertElem->hasOneUse()) - return false; - } while (true); - std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); - return true; -} - -/// Like findBuildVector, but looks for construction of aggregate. -/// Accepts homegeneous aggregate of vectors like { <2 x float>, <2 x float> }. +/// Also recognize aggregates like {<2 x float>, <2 x float>}, +/// {{float, float}, {float, float}}, [2 x {float, float}] and so on. +/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples. +/// +/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type. /// /// \return true if it matches. -static bool findBuildAggregate(InsertValueInst *IV, TargetTransformInfo *TTI, +static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI, SmallVectorImpl &BuildVectorOpds, int &UserCost) { + assert((isa(LastInsertInst) || + isa(LastInsertInst)) && + "Expected insertelement or insertvalue instruction!"); UserCost = 0; do { - if (auto *IE = dyn_cast(IV->getInsertedValueOperand())) { + Value *InsertedOperand; + if (auto *IE = dyn_cast(LastInsertInst)) { + InsertedOperand = IE->getOperand(1); + LastInsertInst = IE->getOperand(0); + if (auto *CI = dyn_cast(IE->getOperand(2))) { + UserCost += TTI->getVectorInstrCost(Instruction::InsertElement, + IE->getType(), CI->getZExtValue()); + } + } else { + auto *IV = cast(LastInsertInst); + InsertedOperand = IV->getInsertedValueOperand(); + LastInsertInst = IV->getAggregateOperand(); + } + if (isa(InsertedOperand) || + isa(InsertedOperand)) { int TmpUserCost; - SmallVector TmpBuildVectorOpds; - if (!findBuildVector(IE, TTI, TmpBuildVectorOpds, TmpUserCost)) + SmallVector TmpBuildVectorOpds; + if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds, + TmpUserCost)) return false; - BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(), TmpBuildVectorOpds.rend()); + BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(), + TmpBuildVectorOpds.rend()); UserCost += TmpUserCost; } else { - BuildVectorOpds.push_back(IV->getInsertedValueOperand()); + BuildVectorOpds.push_back(InsertedOperand); } - Value *V = IV->getAggregateOperand(); - if (isa(V)) + if (isa(LastInsertInst)) break; - IV = dyn_cast(V); - if (!IV || !IV->hasOneUse()) + if ((!isa(LastInsertInst) && + !isa(LastInsertInst)) || + !LastInsertInst->hasOneUse()) return false; } while (true); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); @@ -7177,7 +7171,7 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { int UserCost; SmallVector BuildVectorOpds; - if (!findBuildVector(IEI, TTI, BuildVectorOpds, UserCost) || + if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, UserCost) || (llvm::all_of(BuildVectorOpds, [](Value *V) { return isa(V); }) && isShuffle(BuildVectorOpds))) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 123477cd62096..b15c5d0f7dad1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -468,6 +468,11 @@ void VPlan::execute(VPTransformState *State) { updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD +void VPlan::dump() const { dbgs() << *this << '\n'; } +#endif + void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, BasicBlock *LoopLatchBB) { BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); @@ -527,8 +532,7 @@ void VPlanPrinter::dump() { if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) { OS << ", where:"; if (Plan.BackedgeTakenCount) - OS << "\\n" - << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount"; + OS << "\\n" << *Plan.BackedgeTakenCount << " := BackedgeTakenCount"; for (auto Entry : Plan.Value2VPValue) { OS << "\\n" << *Entry.second; OS << DOT::EscapeString(" := "); @@ -540,7 +544,7 @@ void VPlanPrinter::dump() { OS << "edge [fontname=Courier, fontsize=30]\n"; OS << "compound=true\n"; - for (VPBlockBase *Block : depth_first(Plan.getEntry())) + for (const VPBlockBase *Block : depth_first(Plan.getEntry())) dumpBlock(Block); OS << "}\n"; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 226c6c0279d7e..6fabd5c39ba5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1159,6 +1159,128 @@ class VPRegionBlock : public VPBlockBase { void execute(struct VPTransformState *State) override; }; +//===----------------------------------------------------------------------===// +// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // +//===----------------------------------------------------------------------===// + +// The following set of template specializations implement GraphTraits to treat +// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note +// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the +// VPBlockBase is a VPRegionBlock, this specialization provides access to its +// successors/predecessors but not to the blocks inside the region. + +template <> struct GraphTraits { + using NodeRef = VPBlockBase *; + using ChildIteratorType = SmallVectorImpl::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + + static inline ChildIteratorType child_begin(NodeRef N) { + return N->getSuccessors().begin(); + } + + static inline ChildIteratorType child_end(NodeRef N) { + return N->getSuccessors().end(); + } +}; + +template <> struct GraphTraits { + using NodeRef = const VPBlockBase *; + using ChildIteratorType = SmallVectorImpl::const_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + + static inline ChildIteratorType child_begin(NodeRef N) { + return N->getSuccessors().begin(); + } + + static inline ChildIteratorType child_end(NodeRef N) { + return N->getSuccessors().end(); + } +}; + +// Inverse order specialization for VPBasicBlocks. Predecessors are used instead +// of successors for the inverse traversal. +template <> struct GraphTraits> { + using NodeRef = VPBlockBase *; + using ChildIteratorType = SmallVectorImpl::iterator; + + static NodeRef getEntryNode(Inverse B) { return B.Graph; } + + static inline ChildIteratorType child_begin(NodeRef N) { + return N->getPredecessors().begin(); + } + + static inline ChildIteratorType child_end(NodeRef N) { + return N->getPredecessors().end(); + } +}; + +// The following set of template specializations implement GraphTraits to +// treat VPRegionBlock as a graph and recurse inside its nodes. It's important +// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases +// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so +// there won't be automatic recursion into other VPBlockBases that turn to be +// VPRegionBlocks. + +template <> +struct GraphTraits : public GraphTraits { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits + : public GraphTraits { + using GraphRef = const VPRegionBlock *; + using nodes_iterator = df_iterator; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits> + : public GraphTraits> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator; + + static NodeRef getEntryNode(Inverse N) { + return N.Graph->getExit(); + } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getExit()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + /// VPlan models a candidate for vectorization, encoding various decisions take /// to produce efficient output IR, including which branches, basic-blocks and /// output IR instructions to generate, and their cost. VPlan holds a @@ -1265,6 +1387,9 @@ class VPlan { VPLoopInfo &getVPLoopInfo() { return VPLInfo; } const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } + /// Dump the plan to stderr (for debugging). + void dump() const; + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. @@ -1276,20 +1401,20 @@ class VPlan { /// VPlanPrinter prints a given VPlan to a given output stream. The printing is /// indented and follows the dot format. class VPlanPrinter { - friend inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan); + friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan); friend inline raw_ostream &operator<<(raw_ostream &OS, const struct VPlanIngredient &I); private: raw_ostream &OS; - VPlan &Plan; + const VPlan &Plan; unsigned Depth = 0; unsigned TabWidth = 2; std::string Indent; unsigned BID = 0; SmallDenseMap BlockID; - VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {} + VPlanPrinter(raw_ostream &O, const VPlan &P) : OS(O), Plan(P) {} /// Handle indentation. void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } @@ -1336,134 +1461,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { return OS; } -inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { +inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { VPlanPrinter Printer(OS, Plan); Printer.dump(); return OS; } -//===----------------------------------------------------------------------===// -// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // -//===----------------------------------------------------------------------===// - -// The following set of template specializations implement GraphTraits to treat -// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note -// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the -// VPBlockBase is a VPRegionBlock, this specialization provides access to its -// successors/predecessors but not to the blocks inside the region. - -template <> struct GraphTraits { - using NodeRef = VPBlockBase *; - using ChildIteratorType = SmallVectorImpl::iterator; - - static NodeRef getEntryNode(NodeRef N) { return N; } - - static inline ChildIteratorType child_begin(NodeRef N) { - return N->getSuccessors().begin(); - } - - static inline ChildIteratorType child_end(NodeRef N) { - return N->getSuccessors().end(); - } -}; - -template <> struct GraphTraits { - using NodeRef = const VPBlockBase *; - using ChildIteratorType = SmallVectorImpl::const_iterator; - - static NodeRef getEntryNode(NodeRef N) { return N; } - - static inline ChildIteratorType child_begin(NodeRef N) { - return N->getSuccessors().begin(); - } - - static inline ChildIteratorType child_end(NodeRef N) { - return N->getSuccessors().end(); - } -}; - -// Inverse order specialization for VPBasicBlocks. Predecessors are used instead -// of successors for the inverse traversal. -template <> struct GraphTraits> { - using NodeRef = VPBlockBase *; - using ChildIteratorType = SmallVectorImpl::iterator; - - static NodeRef getEntryNode(Inverse B) { return B.Graph; } - - static inline ChildIteratorType child_begin(NodeRef N) { - return N->getPredecessors().begin(); - } - - static inline ChildIteratorType child_end(NodeRef N) { - return N->getPredecessors().end(); - } -}; - -// The following set of template specializations implement GraphTraits to -// treat VPRegionBlock as a graph and recurse inside its nodes. It's important -// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases -// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so -// there won't be automatic recursion into other VPBlockBases that turn to be -// VPRegionBlocks. - -template <> -struct GraphTraits : public GraphTraits { - using GraphRef = VPRegionBlock *; - using nodes_iterator = df_iterator; - - static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } - - static nodes_iterator nodes_begin(GraphRef N) { - return nodes_iterator::begin(N->getEntry()); - } - - static nodes_iterator nodes_end(GraphRef N) { - // df_iterator::end() returns an empty iterator so the node used doesn't - // matter. - return nodes_iterator::end(N); - } -}; - -template <> -struct GraphTraits - : public GraphTraits { - using GraphRef = const VPRegionBlock *; - using nodes_iterator = df_iterator; - - static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } - - static nodes_iterator nodes_begin(GraphRef N) { - return nodes_iterator::begin(N->getEntry()); - } - - static nodes_iterator nodes_end(GraphRef N) { - // df_iterator::end() returns an empty iterator so the node used doesn't - // matter. - return nodes_iterator::end(N); - } -}; - -template <> -struct GraphTraits> - : public GraphTraits> { - using GraphRef = VPRegionBlock *; - using nodes_iterator = df_iterator; - - static NodeRef getEntryNode(Inverse N) { - return N.Graph->getExit(); - } - - static nodes_iterator nodes_begin(GraphRef N) { - return nodes_iterator::begin(N->getExit()); - } - - static nodes_iterator nodes_end(GraphRef N) { - // df_iterator::end() returns an empty iterator so the node used doesn't - // matter. - return nodes_iterator::end(N); - } -}; - //===----------------------------------------------------------------------===// // VPlan Utilities //===----------------------------------------------------------------------===// diff --git a/llvm/lib/WindowsManifest/CMakeLists.txt b/llvm/lib/WindowsManifest/CMakeLists.txt index 4f2d011d54348..fe6ddcd414d56 100644 --- a/llvm/lib/WindowsManifest/CMakeLists.txt +++ b/llvm/lib/WindowsManifest/CMakeLists.txt @@ -1,18 +1,12 @@ -set(system_libs) -if( CMAKE_HOST_UNIX ) - if( LLVM_LIBXML2_ENABLED ) - set(system_libs ${system_libs} ${LIBXML2_LIBS}) - endif() -endif() - add_llvm_component_library(LLVMWindowsManifest WindowsManifestMerger.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/WindowsManifest - ${Backtrace_INCLUDE_DIRS} + ${Backtrace_INCLUDE_DIRS}) - LINK_LIBS ${system_libs} - ) - -set_property(TARGET LLVMWindowsManifest PROPERTY LLVM_SYSTEM_LIBS "${system_libs}") +if(LIBXML2_LIBRARIES) + target_link_libraries(LLVMWindowsManifest PUBLIC ${LIBXML2_LIBRARIES}) + set_property(TARGET LLVMWindowsManifest PROPERTY + LLVM_SYSTEM_LIBS ${LIBXML2_LIBRARIES}) +endif() diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 64e0a82456f11..8212cc4769045 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -141,6 +141,24 @@ exit: ret i32 %result } +define i32 @test_cold_loop(i32 %a, i32 %b) { +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit + +header: + br label %body + +body: + %cond2 = icmp eq i32 %b, 42 + br i1 %cond2, label %header, label %exit +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% + +exit: + call void @coldfunc() + ret i32 %b +} + declare i32 @regular_function(i32 %i) define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 0566ca16c2f3a..6e01afd2cfc82 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -79,6 +79,32 @@ exit: ret i32 %b } +define i32 @test4(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test4' +; Make sure we handle loops post-dominated by unreachables. +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit +; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] + +header: + br label %body + +body: + %cond2 = icmp eq i32 %a, 42 + br i1 %cond2, label %header, label %abort +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% + +abort: + call void @abort() noreturn + unreachable + +exit: + ret i32 %b +} + @_ZTIi = external global i8* ; CHECK-LABEL: throwSmallException diff --git a/llvm/test/Analysis/ConstantFolding/copysign.ll b/llvm/test/Analysis/ConstantFolding/copysign.ll new file mode 100644 index 0000000000000..228ffcb470538 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/copysign.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -constprop < %s | FileCheck %s + +declare float @llvm.copysign.f32(float, float) +declare double @llvm.copysign.f64(double, double) + +define float @f32_01() { +; CHECK-LABEL: @f32_01( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.copysign.f32(float 1.0, float -2.0) + ret float %x +} + +define float @f32_02() { +; CHECK-LABEL: @f32_02( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float 1.0) + ret float %x +} + +define float @f32_03() { +; CHECK-LABEL: @f32_03( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float -1.0) + ret float %x +} + +define double @f64_01() { +; CHECK-LABEL: @f64_01( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double 1.0, double -2.0) + ret double %x +} + +define double @f64_02() { +; CHECK-LABEL: @f64_02( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double 2.0) + ret double %x +} + +define double @f64_03() { +; CHECK-LABEL: @f64_03( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double -2.0) + ret double %x +} diff --git a/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll b/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll new file mode 100644 index 0000000000000..3e4fb82e600c6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future | FileCheck %s --check-prefix=FUTURE +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 | FileCheck %s --check-prefix=PWR9 + +define void @test(i16 %p1, i16 %p2, <4 x i16> %p3, <4 x i16> %p4) { + %i1 = add i16 %p1, %p2 + %v1 = add <4 x i16> %p3, %p4 + ret void + ; FUTURE: cost of 1 {{.*}} add + ; FUTURE: cost of 1 {{.*}} add + + ; PWR9: cost of 1 {{.*}} add + ; PWR9: cost of 2 {{.*}} add +} + diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index 7583d6e60c809..bb03b56e48f60 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -6,7 +6,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -39,6 +39,13 @@ define i32 @fptosi_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> @@ -75,6 +82,13 @@ define i32 @fptosi_double_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> @@ -111,6 +125,13 @@ define i32 @fptosi_double_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> @@ -147,6 +168,13 @@ define i32 @fptosi_double_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> @@ -194,6 +222,14 @@ define i32 @fptosi_float_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> @@ -218,6 +254,13 @@ define i32 @fptosi_float_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> @@ -254,6 +297,13 @@ define i32 @fptosi_float_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> @@ -290,6 +340,13 @@ define i32 @fptosi_float_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 078b21ba72033..cdb3e5486604f 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -6,7 +6,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -39,6 +39,13 @@ define i32 @fptoui_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> @@ -75,6 +82,13 @@ define i32 @fptoui_double_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> @@ -111,6 +125,13 @@ define i32 @fptoui_double_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> @@ -147,6 +168,13 @@ define i32 @fptoui_double_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> @@ -194,6 +222,14 @@ define i32 @fptoui_float_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> @@ -232,6 +268,13 @@ define i32 @fptoui_float_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> @@ -268,6 +311,13 @@ define i32 @fptoui_float_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> @@ -304,6 +354,13 @@ define i32 @fptoui_float_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index 3ceba32744b6b..4ed509ff9db09 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -8,8 +8,8 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,SLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,GLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 ; @@ -270,64 +270,123 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512 } define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi16' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -506,6 +565,124 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; +; SLM-LABEL: 'test_vXi16' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; GLM-LABEL: 'test_vXi16' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ; BTVER2-LABEL: 'test_vXi16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -863,125 +1040,6 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; ; AVX-LABEL: 'test_vXi8' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> @@ -1339,6 +1397,244 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; +; SLM-LABEL: 'test_vXi8' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; GLM-LABEL: 'test_vXi8' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ; BTVER2-LABEL: 'test_vXi8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/vector-extract.ll b/llvm/test/Analysis/CostModel/X86/vector-extract.ll index 62123c422a8f5..ddb3654fbc6ab 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-extract.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-extract.ll @@ -9,8 +9,8 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,GLM ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 define i32 @extract_double(i32 %arg) { @@ -188,19 +188,117 @@ define i32 @extract_float(i32 %arg) { } define i32 @extract_i64(i32 %arg) { -; CHECK-LABEL: 'extract_i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i64' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i64' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i64' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i64' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg @@ -234,24 +332,157 @@ define i32 @extract_i64(i32 %arg) { } define i32 @extract_i32(i32 %arg) { -; CHECK-LABEL: 'extract_i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i32' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i32' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i32' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg @@ -296,24 +527,157 @@ define i32 @extract_i32(i32 %arg) { } define i32 @extract_i16(i32 %arg) { -; CHECK-LABEL: 'extract_i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i16' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i16' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i16' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg @@ -357,29 +721,197 @@ define i32 @extract_i16(i32 %arg) { } define i32 @extract_i8(i32 %arg) { -; CHECK-LABEL: 'extract_i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i8' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg diff --git a/llvm/test/Analysis/DDG/basic-a.ll b/llvm/test/Analysis/DDG/basic-a.ll index 920e71f6717be..a52e8c258f501 100644 --- a/llvm/test/Analysis/DDG/basic-a.ll +++ b/llvm/test/Analysis/DDG/basic-a.ll @@ -1,7 +1,44 @@ ; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK-LABEL: 'DDG' for loop 'test1.for.body': -; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction + +; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT: --- start of nodes in pi-block --- +; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %inc = add i64 %i.02, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N11]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10]] +; CHECK-NEXT: --- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %exitcond, label %test1.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N1]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 ; CHECK-NEXT: Edges: @@ -23,12 +60,6 @@ ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %add = fadd float %0, %conv ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N6:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 -; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N5]] ; CHECK: Node Address:[[N5]]:single-instruction @@ -36,36 +67,6 @@ ; CHECK-NEXT: store float %add, float* %arrayidx1, align 4 ; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N8]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %exitcond, label %test1.for.body, label %for.end.loopexit -; CHECK-NEXT: Edges:none! - -; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT: --- start of nodes in pi-block --- -; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc = add i64 %i.02, 1 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N11]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N10]] -; CHECK-NEXT: --- end of nodes in pi-block --- -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N1]] -; CHECK-NEXT: [def-use] to [[N6]] -; CHECK-NEXT: [def-use] to [[N7]] - ;; No memory dependencies. ;; void test1(unsigned long n, float * restrict a, float * restrict b) { @@ -96,78 +97,80 @@ for.end: ; preds = %test1.for.body, %en ; CHECK-LABEL: 'DDG' for loop 'test2.for.body': -; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK: Node Address:[[N2]]:single-instruction +; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT: --- start of nodes in pi-block --- +; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %inc = add i64 %i.02, 1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] -; CHECK: Node Address:[[N4:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N12]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ] ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N11]] +; CHECK-NEXT: --- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] -; CHECK: Node Address:[[N5]]:single-instruction +; CHECK: Node Address:[[N8]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %1 = load float, float* %arrayidx1, align 4 +; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N3]] -; CHECK-NEXT: [memory] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] -; CHECK: Node Address:[[N3]]:single-instruction +; CHECK: Node Address:[[N9]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %add = fadd float %0, %1 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N6]] +; CHECK-NEXT: br i1 %exitcond, label %test2.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N7]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N6]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] -; CHECK: Node Address:[[N6]]:single-instruction +; CHECK: Node Address:[[N4]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx2, align 4 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] -; CHECK: Node Address:[[N8:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N5]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %exitcond = icmp ne i64 %inc, %n +; CHECK-NEXT: %1 = load float, float* %arrayidx1, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [memory] to [[N6]] -; CHECK: Node Address:[[N9]]:single-instruction +; CHECK: Node Address:[[N1]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %exitcond, label %test2.for.body, label %for.end.loopexit -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT: --- start of nodes in pi-block --- -; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N2]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc = add i64 %i.02, 1 +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N3]] -; CHECK: Node Address:[[N12]]:single-instruction +; CHECK: Node Address:[[N3]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N11]] -; CHECK-NEXT: --- end of nodes in pi-block --- +; CHECK-NEXT: %add = fadd float %0, %1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N1]] -; CHECK-NEXT: [def-use] to [[N4]] -; CHECK-NEXT: [def-use] to [[N7]] -; CHECK-NEXT: [def-use] to [[N8]] +; CHECK-NEXT: [def-use] to [[N6]] + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: store float %add, float* %arrayidx2, align 4 +; CHECK-NEXT: Edges:none! + ;; Loop-independent memory dependencies. diff --git a/llvm/test/Analysis/DDG/basic-b.ll b/llvm/test/Analysis/DDG/basic-b.ll index f83f7fe92f3b3..757c706193a5b 100644 --- a/llvm/test/Analysis/DDG/basic-b.ll +++ b/llvm/test/Analysis/DDG/basic-b.ll @@ -1,19 +1,45 @@ ; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK-LABEL: 'DDG' for loop 'test1.for.body': -; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction + +; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N13:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: %inc = add i64 %i.02, 1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N14:0x[0-9a-f]*]] -; CHECK: Node Address:[[N2]]:single-instruction +; CHECK: Node Address:[[N14]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 1, %test1.for.body.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N13]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N7]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp = icmp ult i64 %inc, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp, label %test1.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N6]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] -; CHECK: Node Address:[[N4:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N4]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %sub1 = add i64 %i.02, -1 ; CHECK-NEXT: Edges: @@ -25,22 +51,17 @@ ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N3]] -; CHECK: Node Address:[[N6:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N1]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N3]] +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N2]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %cmp = icmp ult i64 %inc, %sub +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N8]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %cmp, label %test1.for.body, label %for.end.loopexit -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: [def-use] to [[N3]] ; CHECK: Node Address:[[N3]]:pi-block ; CHECK-NEXT: --- start of nodes in pi-block --- @@ -64,25 +85,6 @@ ; CHECK-NEXT:--- end of nodes in pi-block --- ; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT:--- start of nodes in pi-block --- -; CHECK: Node Address:[[N13:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc = add i64 %i.02, 1 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N14:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N14]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test1.for.body ], [ 1, %test1.for.body.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N13]] -; CHECK-NEXT:--- end of nodes in pi-block --- -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N1]] -; CHECK-NEXT: [def-use] to [[N4]] -; CHECK-NEXT: [def-use] to [[N6]] -; CHECK-NEXT: [def-use] to [[N7]] ;; Loop-carried dependence requiring edge-reversal to expose a cycle @@ -117,19 +119,45 @@ for.end: ; preds = %test1.for.body, %en } ; CHECK-LABEL: 'DDG' for loop 'test2.for.body': -; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction + +; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N12:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 +; CHECK-NEXT: %inc = add i64 %i.02, 1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] -; CHECK: Node Address:[[N2]]:single-instruction +; CHECK: Node Address:[[N13]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 1, %test2.for.body.preheader ] ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N12]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N1:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N9]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp = icmp ult i64 %inc, %sub +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp, label %test2.for.body, label %for.end.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] -; CHECK: Node Address:[[N4:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N4]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %add1 = add i64 %i.02, 1 ; CHECK-NEXT: Edges: @@ -145,57 +173,33 @@ for.end: ; preds = %test1.for.body, %en ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %1 = load float, float* %arrayidx2, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N3]] -; CHECK-NEXT: [memory] to [[N7:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] +; CHECK-NEXT: [memory] to [[N7]] -; CHECK: Node Address:[[N3]]:single-instruction +; CHECK: Node Address:[[N1]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %add = fadd float %0, %1 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N7]] +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK: Node Address:[[N8:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N2]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N7]] +; CHECK-NEXT: [def-use] to [[N3]] -; CHECK: Node Address:[[N7]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx3, align 4 -; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N3]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %cmp = icmp ult i64 %inc, %sub +; CHECK-NEXT: %add = fadd float %0, %1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7]] -; CHECK: Node Address:[[N10]]:single-instruction +; CHECK: Node Address:[[N7]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %cmp, label %test2.for.body, label %for.end.loopexit +; CHECK-NEXT: store float %add, float* %arrayidx3, align 4 ; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT:--- start of nodes in pi-block --- -; CHECK: Node Address:[[N12:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc = add i64 %i.02, 1 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N13]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %i.02 = phi i64 [ %inc, %test2.for.body ], [ 1, %test2.for.body.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N12]] -; CHECK-NEXT:--- end of nodes in pi-block --- -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N1]] -; CHECK-NEXT: [def-use] to [[N4]] -; CHECK-NEXT: [def-use] to [[N8]] -; CHECK-NEXT: [def-use] to [[N9]] - ;; Forward loop-carried dependence *not* causing a cycle. ;; void test2(unsigned long n, float * restrict a, float * restrict b) { diff --git a/llvm/test/Analysis/DDG/basic-loopnest.ll b/llvm/test/Analysis/DDG/basic-loopnest.ll index aded488ef2365..41c2cbbdc7a15 100644 --- a/llvm/test/Analysis/DDG/basic-loopnest.ll +++ b/llvm/test/Analysis/DDG/basic-loopnest.ll @@ -2,73 +2,65 @@ ; CHECK-LABEL: 'DDG' for loop 'test1.for.cond1.preheader': -; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %sub = add i64 %n, -1 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] -; CHECK: Node Address:[[N3]]:single-instruction +; CHECK: Node Address:[[N28:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N29:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %cmp21 = icmp ult i64 1, %sub +; CHECK-NEXT: %inc = add i64 %j.02, 1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N4]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %cmp21, label %for.body4.preheader, label %for.inc12 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: [def-use] to [[N30:0x[0-9a-f]*]] -; CHECK: Node Address:[[N5:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N30]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n +; CHECK-NEXT: %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %for.body4.preheader ] ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N6]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: [def-use] to [[N29]] +; CHECK-NEXT:--- end of nodes in pi-block --- ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N16:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK: Node Address:[[N7]]:single-instruction +; CHECK: Node Address:[[N13]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 +; CHECK-NEXT: %sub7 = add i64 %j.02, -1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] -; CHECK: Node Address:[[N8]]:single-instruction +; CHECK: Node Address:[[N25:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N26:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: %inc13 = add i64 %i.04, 1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N27:0x[0-9a-f]*]] -; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N27]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n +; CHECK-NEXT: %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %test1.for.cond1.preheader.preheader ] ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N11]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 +; CHECK-NEXT: [def-use] to [[N26]] +; CHECK-NEXT:--- end of nodes in pi-block --- ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N14:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N18:0x[0-9a-f]*]] -; CHECK: Node Address:[[N13:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N18]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %sub7 = add i64 %j.02, -1 +; CHECK-NEXT: %exitcond = icmp ne i64 %inc13, %n ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N12]] +; CHECK-NEXT: [def-use] to [[N19:0x[0-9a-f]*]] -; CHECK: Node Address:[[N12]]:single-instruction +; CHECK: Node Address:[[N19]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %sub7 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N9]] +; CHECK-NEXT: br i1 %exitcond, label %test1.for.cond1.preheader, label %for.end14.loopexit +; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N14:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N14]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n ; CHECK-NEXT: Edges: @@ -78,45 +70,55 @@ ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N16:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N16]] ; CHECK: Node Address:[[N16]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N9]] +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] -; CHECK: Node Address:[[N2]]:single-instruction +; CHECK: Node Address:[[N10]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %cmp2 = icmp ult i64 %inc, %sub +; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N11:0x[0-9a-f]*]] -; CHECK: Node Address:[[N17]]:single-instruction +; CHECK: Node Address:[[N11]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %cmp2, label %for.body4, label %for.inc12.loopexit -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N12]] -; CHECK: Node Address:[[N18:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N12]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %exitcond = icmp ne i64 %inc13, %n +; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %sub7 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N19:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N9]] -; CHECK: Node Address:[[N19]]:single-instruction +; CHECK: Node Address:[[N5]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %exitcond, label %test1.for.cond1.preheader, label %for.end14.loopexit -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] -; CHECK: Node Address:[[N20:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N6]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br label %for.body4 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7]] -; CHECK: Node Address:[[N21:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N7]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br label %for.inc12 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N8]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N9]] ; CHECK: Node Address:[[N9]]:pi-block ; CHECK-NEXT:--- start of nodes in pi-block --- @@ -140,46 +142,44 @@ ; CHECK-NEXT:--- end of nodes in pi-block --- ; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N25:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT:--- start of nodes in pi-block --- -; CHECK: Node Address:[[N26:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N21:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc13 = add i64 %i.04, 1 -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N27:0x[0-9a-f]*]] +; CHECK-NEXT: br label %for.inc12 +; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N27]]:single-instruction +; CHECK: Node Address:[[N20:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %test1.for.cond1.preheader.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N26]] -; CHECK-NEXT:--- end of nodes in pi-block --- -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N5]] -; CHECK-NEXT: [def-use] to [[N10]] -; CHECK-NEXT: [def-use] to [[N14]] -; CHECK-NEXT: [def-use] to [[N18]] +; CHECK-NEXT: br label %for.body4 +; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N28:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT:--- start of nodes in pi-block --- -; CHECK: Node Address:[[N29:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc = add i64 %j.02, 1 +; CHECK-NEXT: %sub = add i64 %n, -1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N30:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] -; CHECK: Node Address:[[N30]]:single-instruction +; CHECK: Node Address:[[N3]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %for.body4.preheader ] +; CHECK-NEXT: %cmp21 = icmp ult i64 1, %sub ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N29]] -; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N4]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp21, label %for.body4.preheader, label %for.inc12 +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N2]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %cmp2 = icmp ult i64 %inc, %sub ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N7]] -; CHECK-NEXT: [def-use] to [[N13]] -; CHECK-NEXT: [def-use] to [[N16]] -; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] +; CHECK: Node Address:[[N17]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: br i1 %cmp2, label %for.body4, label %for.inc12.loopexit +; CHECK-NEXT: Edges:none! ;; This test has a cycle. @@ -232,49 +232,83 @@ for.end14: ; preds = %for.inc12, %entry ; CHECK-LABEL: 'DDG' for loop 'test2.for.cond1.preheader': -; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction + +; CHECK: Node Address:[[PI1:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N28:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %sub = add i64 %n, -1 +; CHECK-NEXT: %inc = add i64 %j.02, 1 ; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N29:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N29]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %for.body4.preheader ] +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N28]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N13:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N18:0x[0-9a-f]*]] ; CHECK-NEXT: [def-use] to [[N2:0x[0-9a-f]*]] -; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] -; CHECK: Node Address:[[N3]]:single-instruction +; CHECK: Node Address:[[N13]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %cmp21 = icmp ult i64 1, %sub +; CHECK-NEXT: %add7 = add i64 %j.02, 1 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] -; CHECK: Node Address:[[N4]]:single-instruction +; CHECK: Node Address:[[N24:0x[0-9a-f]*]]:pi-block +; CHECK-NEXT:--- start of nodes in pi-block --- +; CHECK: Node Address:[[N25:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %cmp21, label %for.body4.preheader, label %for.inc12 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %inc13 = add i64 %i.04, 1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N26:0x[0-9a-f]*]] -; CHECK: Node Address:[[N5:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N26]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n +; CHECK-NEXT: %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %test2.for.cond1.preheader.preheader ] ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N25]] +; CHECK-NEXT:--- end of nodes in pi-block --- +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N5:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N16:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N20:0x[0-9a-f]*]] -; CHECK: Node Address:[[N6]]:single-instruction +; CHECK: Node Address:[[N20]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: %exitcond = icmp ne i64 %inc13, %n ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N21:0x[0-9a-f]*]] -; CHECK: Node Address:[[N7]]:single-instruction +; CHECK: Node Address:[[N21]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 +; CHECK-NEXT: br i1 %exitcond, label %test2.for.cond1.preheader, label %for.end14.loopexit +; CHECK-NEXT: Edges:none! + +; CHECK: Node Address:[[N16]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] -; CHECK: Node Address:[[N8]]:single-instruction +; CHECK: Node Address:[[N17]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N18]] -; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N18]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N15:0x[0-9a-f]*]] + +; CHECK: Node Address:[[N10]]:single-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n ; CHECK-NEXT: Edges: @@ -284,12 +318,6 @@ for.end14: ; preds = %for.inc12, %entry ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N12:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N13:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %add7 = add i64 %j.02, 1 -; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N12]] ; CHECK: Node Address:[[N12]]:single-instruction @@ -302,58 +330,47 @@ for.end14: ; preds = %for.inc12, %entry ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %3 = load float, float* %arrayidx8, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N9]] -; CHECK-NEXT: [memory] to [[N15:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] +; CHECK-NEXT: [memory] to [[N15]] -; CHECK: Node Address:[[N9]]:single-instruction +; CHECK: Node Address:[[N5]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %add = fadd float %1, %3 +; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N15]] +; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] -; CHECK: Node Address:[[N16:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N6]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N7]] -; CHECK: Node Address:[[N17]]:single-instruction +; CHECK: Node Address:[[N7]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N18:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] -; CHECK: Node Address:[[N18]]:single-instruction +; CHECK: Node Address:[[N8]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 +; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N15]] - -; CHECK: Node Address:[[N15]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx11, align 4 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: [def-use] to [[N9]] -; CHECK: Node Address:[[N2]]:single-instruction +; CHECK: Node Address:[[N9]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %cmp2 = icmp ult i64 %inc, %sub +; CHECK-NEXT: %add = fadd float %1, %3 ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N19:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N15]] -; CHECK: Node Address:[[N19]]:single-instruction +; CHECK: Node Address:[[N15]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %cmp2, label %for.body4, label %for.inc12.loopexit +; CHECK-NEXT: store float %add, float* %arrayidx11, align 4 ; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N20:0x[0-9a-f]*]]:single-instruction -; CHECK-NEXT: Instructions: -; CHECK-NEXT: %exitcond = icmp ne i64 %inc13, %n -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N21:0x[0-9a-f]*]] - -; CHECK: Node Address:[[N21]]:single-instruction +; CHECK: Node Address:[[N23:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br i1 %exitcond, label %test2.for.cond1.preheader, label %for.end14.loopexit +; CHECK-NEXT: br label %for.inc12 ; CHECK-NEXT: Edges:none! ; CHECK: Node Address:[[N22:0x[0-9a-f]*]]:single-instruction @@ -361,50 +378,34 @@ for.end14: ; preds = %for.inc12, %entry ; CHECK-NEXT: br label %for.body4 ; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N23:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: br label %for.inc12 -; CHECK-NEXT: Edges:none! +; CHECK-NEXT: %sub = add i64 %n, -1 +; CHECK-NEXT: Edges: +; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT: [def-use] to [[N3:0x[0-9a-f]*]] -; CHECK: Node Address:[[N24:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT:--- start of nodes in pi-block --- -; CHECK: Node Address:[[N25:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N3]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc13 = add i64 %i.04, 1 +; CHECK-NEXT: %cmp21 = icmp ult i64 1, %sub ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N26:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N4:0x[0-9a-f]*]] -; CHECK: Node Address:[[N26]]:single-instruction +; CHECK: Node Address:[[N4]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %i.04 = phi i64 [ %inc13, %for.inc12 ], [ 0, %test2.for.cond1.preheader.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N25]] -; CHECK-NEXT:--- end of nodes in pi-block --- -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N5]] -; CHECK-NEXT: [def-use] to [[N10]] -; CHECK-NEXT: [def-use] to [[N16]] -; CHECK-NEXT: [def-use] to [[N20]] +; CHECK-NEXT: br i1 %cmp21, label %for.body4.preheader, label %for.inc12 +; CHECK-NEXT: Edges:none! -; CHECK: Node Address:[[N27:0x[0-9a-f]*]]:pi-block -; CHECK-NEXT:--- start of nodes in pi-block --- -; CHECK: Node Address:[[N28:0x[0-9a-f]*]]:single-instruction +; CHECK: Node Address:[[N2]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %inc = add i64 %j.02, 1 +; CHECK-NEXT: %cmp2 = icmp ult i64 %inc, %sub ; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N29:0x[0-9a-f]*]] +; CHECK-NEXT: [def-use] to [[N19:0x[0-9a-f]*]] -; CHECK: Node Address:[[N29]]:single-instruction +; CHECK: Node Address:[[N19]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %for.body4.preheader ] -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N28]] -; CHECK-NEXT:--- end of nodes in pi-block --- -; CHECK-NEXT: Edges: -; CHECK-NEXT: [def-use] to [[N7]] -; CHECK-NEXT: [def-use] to [[N13]] -; CHECK-NEXT: [def-use] to [[N18]] -; CHECK-NEXT: [def-use] to [[N2]] +; CHECK-NEXT: br i1 %cmp2, label %for.body4, label %for.inc12.loopexit +; CHECK-NEXT: Edges:none! ;; This test has no cycles. diff --git a/llvm/test/Analysis/DDG/root-node.ll b/llvm/test/Analysis/DDG/root-node.ll index 34d6437ef9c01..868fb72d452a3 100644 --- a/llvm/test/Analysis/DDG/root-node.ll +++ b/llvm/test/Analysis/DDG/root-node.ll @@ -7,12 +7,11 @@ ; CHECK-NEXT: [rooted] to [[N1:0x[0-9a-f]*]] ; CHECK-NEXT: [rooted] to [[N2:0x[0-9a-f]*]] -; CHECK: Node Address:[[N1]]:pi-block -; CHECK: %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] - ; CHECK: Node Address:[[N2]]:pi-block ; CHECK: %i1.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %test1.for.body ] +; CHECK: Node Address:[[N1]]:pi-block +; CHECK: %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] ;; // Two separate components in the graph. Root node must link to both. ;; void test1(unsigned long n, float * restrict a, float * restrict b) { diff --git a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll index 059bbaa3c4e74..3b1c43df5a701 100755 --- a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll +++ b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll @@ -8,7 +8,7 @@ define void @test() #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 40 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 39 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Bindings/OCaml/bitwriter.ml b/llvm/test/Bindings/OCaml/bitwriter.ml index 28a61fee91b1b..17111bd3b51e0 100644 --- a/llvm/test/Bindings/OCaml/bitwriter.ml +++ b/llvm/test/Bindings/OCaml/bitwriter.ml @@ -17,7 +17,7 @@ let test x = if not x then exit 1 else () let read_file name = let ic = open_in_bin name in let len = in_channel_length ic in - let buf = String.create len in + let buf = Bytes.create len in test ((input ic buf 0 len) = len); @@ -46,4 +46,4 @@ let _ = test (file_buf = temp_bitcode m); test (file_buf = temp_bitcode ~unbuffered:false m); test (file_buf = temp_bitcode ~unbuffered:true m); - test (file_buf = Llvm.MemoryBuffer.as_string (Llvm_bitwriter.write_bitcode_to_memory_buffer m)) + test (file_buf = Bytes.of_string (Llvm.MemoryBuffer.as_string (Llvm_bitwriter.write_bitcode_to_memory_buffer m))) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-copy.mir new file mode 100644 index 0000000000000..d0e9fd5cd1a0b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-copy.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -march=aarch64 -run-pass=aarch64-prelegalizer-combiner %s | FileCheck %s + +# Make sure we don't lose the register bank constraints when +# combining COPY instructions. +--- +name: test_none_none +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_none_none + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY %0(s64) + $x0 = COPY %1(s64) +... +--- +name: test_gpr_none +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_gpr_none + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0:gpr(s64) = COPY $x0 + %1:_(s64) = COPY %0(s64) + $x0 = COPY %1(s64) +... +--- +name: test_none_gpr +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_none_gpr + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s64) = COPY [[COPY]](s64) + ; CHECK: $x0 = COPY [[COPY1]](s64) + %0:_(s64) = COPY $x0 + %1:gpr(s64) = COPY %0(s64) + $x0 = COPY %1(s64) +... +--- +name: test_fpr_gpr +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_fpr_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s64) = COPY [[COPY]](s64) + ; CHECK: $x0 = COPY [[COPY1]](s64) + %0:fpr(s64) = COPY $x0 + %1:gpr(s64) = COPY %0(s64) + $x0 = COPY %1(s64) +... +--- +name: test_gpr64_gpr64_dst_no_llt +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_gpr64_gpr64_dst_no_llt + ; CHECK: [[COPY:%[0-9]+]]:gpr64(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]](s64) + ; CHECK: $x0 = COPY [[COPY1]] + %0:gpr64(s64) = COPY $x0 + %1:gpr64 = COPY %0(s64) + $x0 = COPY %1 +... +--- +name: test_gpr64_gpr64_src_no_llt +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_gpr64_gpr64_src_no_llt + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64(s64) = COPY [[COPY]] + ; CHECK: $x0 = COPY [[COPY1]](s64) + %0:gpr64 = COPY $x0 + %1:gpr64(s64) = COPY %0 + $x0 = COPY %1(s64) +... +--- +name: test_gpr64_gpr64_both_no_llt +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_gpr64_gpr64_both_no_llt + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]] + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY %0 + $x0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir new file mode 100644 index 0000000000000..2ee372ada08be --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir @@ -0,0 +1,22 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s +--- +name: test_load_trunc +stack: + - { id: 0, type: default, offset: 0, size: 2, + alignment: 2, stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + + ; CHECK-LABEL: name: test_load_trunc + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load 2) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s10) = G_TRUNC [[LOAD]](s16) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[TRUNC]](s10) + ; CHECK: RET_ReallyLR implicit [[TRUNC1]](s1) + %0:_(p0) = G_FRAME_INDEX %stack.0 + %1:_(s10) = G_LOAD %0(p0) :: (load 2) + %2:_(s1) = G_TRUNC %1(s10) + RET_ReallyLR implicit %2(s1) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir index 587b519554a71..01e6bd820efbc 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir @@ -1433,8 +1433,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[MLAv8i8_:%[0-9]+]]:fpr64 = MLAv8i8 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i8_]] + ; CHECK: [[MULv8i8_:%[0-9]+]]:fpr64 = MULv8i8 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv8i8_:%[0-9]+]]:fpr64 = ADDv8i8 [[MULv8i8_]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv8i8_]] %4:fpr(<8 x s8>) = COPY $d2 %3:fpr(<8 x s8>) = COPY $d1 %2:fpr(<8 x s8>) = COPY $d0 @@ -1468,8 +1469,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[MLAv16i8_:%[0-9]+]]:fpr128 = MLAv16i8 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv16i8_]] + ; CHECK: [[MULv16i8_:%[0-9]+]]:fpr128 = MULv16i8 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv16i8_:%[0-9]+]]:fpr128 = ADDv16i8 [[MULv16i8_]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv16i8_]] %4:fpr(<16 x s8>) = COPY $q2 %3:fpr(<16 x s8>) = COPY $q1 %2:fpr(<16 x s8>) = COPY $q0 @@ -1503,8 +1505,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[MLAv4i16_:%[0-9]+]]:fpr64 = MLAv4i16 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv4i16_]] + ; CHECK: [[MULv4i16_:%[0-9]+]]:fpr64 = MULv4i16 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv4i16_:%[0-9]+]]:fpr64 = ADDv4i16 [[MULv4i16_]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv4i16_]] %4:fpr(<4 x s16>) = COPY $d2 %3:fpr(<4 x s16>) = COPY $d1 %2:fpr(<4 x s16>) = COPY $d0 @@ -1538,8 +1541,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[MLAv8i16_:%[0-9]+]]:fpr128 = MLAv8i16 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i16_]] + ; CHECK: [[MULv8i16_:%[0-9]+]]:fpr128 = MULv8i16 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv8i16_:%[0-9]+]]:fpr128 = ADDv8i16 [[MULv8i16_]], [[COPY2]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv8i16_]] %4:fpr(<8 x s16>) = COPY $q2 %3:fpr(<8 x s16>) = COPY $q1 %2:fpr(<8 x s16>) = COPY $q0 @@ -1759,8 +1763,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[MLAv8i8_:%[0-9]+]]:fpr64 = MLAv8i8 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i8_]] + ; CHECK: [[MULv8i8_:%[0-9]+]]:fpr64 = MULv8i8 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv8i8_:%[0-9]+]]:fpr64 = ADDv8i8 [[COPY2]], [[MULv8i8_]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv8i8_]] %4:fpr(<8 x s8>) = COPY $d2 %3:fpr(<8 x s8>) = COPY $d1 %2:fpr(<8 x s8>) = COPY $d0 @@ -1794,8 +1799,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[MLAv16i8_:%[0-9]+]]:fpr128 = MLAv16i8 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv16i8_]] + ; CHECK: [[MULv16i8_:%[0-9]+]]:fpr128 = MULv16i8 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv16i8_:%[0-9]+]]:fpr128 = ADDv16i8 [[COPY2]], [[MULv16i8_]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv16i8_]] %4:fpr(<16 x s8>) = COPY $q2 %3:fpr(<16 x s8>) = COPY $q1 %2:fpr(<16 x s8>) = COPY $q0 @@ -1829,8 +1835,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[MLAv4i16_:%[0-9]+]]:fpr64 = MLAv4i16 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv4i16_]] + ; CHECK: [[MULv4i16_:%[0-9]+]]:fpr64 = MULv4i16 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv4i16_:%[0-9]+]]:fpr64 = ADDv4i16 [[COPY2]], [[MULv4i16_]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv4i16_]] %4:fpr(<4 x s16>) = COPY $d2 %3:fpr(<4 x s16>) = COPY $d1 %2:fpr(<4 x s16>) = COPY $d0 @@ -1864,8 +1871,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[MLAv8i16_:%[0-9]+]]:fpr128 = MLAv8i16 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLAv8i16_]] + ; CHECK: [[MULv8i16_:%[0-9]+]]:fpr128 = MULv8i16 [[COPY1]], [[COPY]] + ; CHECK: [[ADDv8i16_:%[0-9]+]]:fpr128 = ADDv8i16 [[COPY2]], [[MULv8i16_]] + ; CHECK: $noreg = PATCHABLE_RET [[ADDv8i16_]] %4:fpr(<8 x s16>) = COPY $q2 %3:fpr(<8 x s16>) = COPY $q1 %2:fpr(<8 x s16>) = COPY $q0 @@ -2085,8 +2093,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[MLSv8i8_:%[0-9]+]]:fpr64 = MLSv8i8 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLSv8i8_]] + ; CHECK: [[MULv8i8_:%[0-9]+]]:fpr64 = MULv8i8 [[COPY1]], [[COPY]] + ; CHECK: [[SUBv8i8_:%[0-9]+]]:fpr64 = SUBv8i8 [[COPY2]], [[MULv8i8_]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv8i8_]] %4:fpr(<8 x s8>) = COPY $d2 %3:fpr(<8 x s8>) = COPY $d1 %2:fpr(<8 x s8>) = COPY $d0 @@ -2120,8 +2129,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[MLSv16i8_:%[0-9]+]]:fpr128 = MLSv16i8 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLSv16i8_]] + ; CHECK: [[MULv16i8_:%[0-9]+]]:fpr128 = MULv16i8 [[COPY1]], [[COPY]] + ; CHECK: [[SUBv16i8_:%[0-9]+]]:fpr128 = SUBv16i8 [[COPY2]], [[MULv16i8_]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv16i8_]] %4:fpr(<16 x s8>) = COPY $q2 %3:fpr(<16 x s8>) = COPY $q1 %2:fpr(<16 x s8>) = COPY $q0 @@ -2155,8 +2165,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[MLSv4i16_:%[0-9]+]]:fpr64 = MLSv4i16 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLSv4i16_]] + ; CHECK: [[MULv4i16_:%[0-9]+]]:fpr64 = MULv4i16 [[COPY1]], [[COPY]] + ; CHECK: [[SUBv4i16_:%[0-9]+]]:fpr64 = SUBv4i16 [[COPY2]], [[MULv4i16_]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv4i16_]] %4:fpr(<4 x s16>) = COPY $d2 %3:fpr(<4 x s16>) = COPY $d1 %2:fpr(<4 x s16>) = COPY $d0 @@ -2190,8 +2201,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[MLSv8i16_:%[0-9]+]]:fpr128 = MLSv8i16 [[COPY2]], [[COPY1]], [[COPY]] - ; CHECK: $noreg = PATCHABLE_RET [[MLSv8i16_]] + ; CHECK: [[MULv8i16_:%[0-9]+]]:fpr128 = MULv8i16 [[COPY1]], [[COPY]] + ; CHECK: [[SUBv8i16_:%[0-9]+]]:fpr128 = SUBv8i16 [[COPY2]], [[MULv8i16_]] + ; CHECK: $noreg = PATCHABLE_RET [[SUBv8i16_]] %4:fpr(<8 x s16>) = COPY $q2 %3:fpr(<8 x s16>) = COPY $q1 %2:fpr(<8 x s16>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll index b0a4256552726..ec3b51bd37a8d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -mcpu=cyclone -enable-misched=false | FileCheck %s ; rdar://13625505 @@ -5,15 +6,25 @@ ; varargs start right after at 8-byte alignment. define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp { ; CHECK-LABEL: fn9: -; 9th fixed argument -; CHECK: ldr {{w[0-9]+}}, [sp, #64] -; CHECK-DAG: add [[ARGS:x[0-9]+]], sp, #72 -; First vararg -; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72] -; Second vararg -; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #80] -; Third vararg -; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #88] +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #64 ; =64 +; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: stp w2, w1, [sp, #52] +; CHECK-NEXT: stp w4, w3, [sp, #44] +; CHECK-NEXT: stp w6, w5, [sp, #36] +; CHECK-NEXT: str w7, [sp, #32] +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: str w8, [sp, #20] +; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: str w8, [sp, #16] +; CHECK-NEXT: add x8, sp, #72 ; =72 +; CHECK-NEXT: add x8, x8, #24 ; =24 +; CHECK-NEXT: str x8, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: str w8, [sp, #12] +; CHECK-NEXT: add sp, sp, #64 ; =64 +; CHECK-NEXT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -51,9 +62,47 @@ declare void @llvm.va_start(i8*) nounwind define i32 @main() nounwind ssp { ; CHECK-LABEL: main: -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK: str {{x[0-9]+}}, [sp, #8] -; CHECK: str {{w[0-9]+}}, [sp] +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #96 ; =96 +; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [sp, #76] +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: str w8, [sp, #72] +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: str w8, [sp, #68] +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: str w8, [sp, #64] +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: str w8, [sp, #60] +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: str w8, [sp, #56] +; CHECK-NEXT: mov w8, #7 +; CHECK-NEXT: str w8, [sp, #52] +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: str w8, [sp, #48] +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: stp w9, w8, [sp, #40] +; CHECK-NEXT: mov w10, #11 +; CHECK-NEXT: mov w11, #12 +; CHECK-NEXT: stp w11, w10, [sp, #32] +; CHECK-NEXT: stp x10, x11, [sp, #16] +; CHECK-NEXT: str x9, [sp, #8] +; CHECK-NEXT: str w8, [sp] +; CHECK-NEXT: add x0, sp, #76 ; =76 +; CHECK-NEXT: mov w1, #2 +; CHECK-NEXT: mov w2, #3 +; CHECK-NEXT: mov w3, #4 +; CHECK-NEXT: mov w4, #5 +; CHECK-NEXT: mov w5, #6 +; CHECK-NEXT: mov w6, #7 +; CHECK-NEXT: mov w7, #8 +; CHECK-NEXT: bl _fn9 +; CHECK-NEXT: mov w0, #0 +; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 ; =96 +; CHECK-NEXT: ret %a1 = alloca i32, align 4 %a2 = alloca i32, align 4 %a3 = alloca i32, align 4 @@ -97,12 +146,20 @@ define i32 @main() nounwind ssp { ;rdar://13668483 @.str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1 define void @foo(i8* %fmt, ...) nounwind { -entry: ; CHECK-LABEL: foo: -; CHECK: ldr {{w[0-9]+}}, [sp, #48] -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #23 -; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 -; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #48 ; =48 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: str w8, [sp, #28] +; CHECK-NEXT: add x8, sp, #48 ; =48 +; CHECK-NEXT: add x8, x8, #23 ; =23 +; CHECK-NEXT: and x8, x8, #0xfffffffffffffff0 +; CHECK-NEXT: add x9, x8, #16 ; =16 +; CHECK-NEXT: stp x9, x0, [sp, #32] +; CHECK-NEXT: ldr q0, [x8] +; CHECK-NEXT: str q0, [sp], #48 +; CHECK-NEXT: ret +entry: %fmt.addr = alloca i8*, align 8 %args = alloca i8*, align 8 %vc = alloca i32, align 4 @@ -118,10 +175,24 @@ entry: } define void @bar(i32 %x, <4 x i32> %y) nounwind { -entry: ; CHECK-LABEL: bar: -; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #16] -; CHECK: str {{x[0-9]+}}, [sp] +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #80 ; =80 +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: str w0, [sp, #60] +; CHECK-NEXT: stp q0, q0, [sp, #16] +; CHECK-NEXT: str x0, [sp] +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x0, l_.str@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: add x0, x0, l_.str@PAGEOFF +; CHECK-NEXT: bl _foo +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 ; =80 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 +entry: %x.addr = alloca i32, align 4 %y.addr = alloca <4 x i32>, align 16 store i32 %x, i32* %x.addr, align 4 @@ -137,12 +208,20 @@ entry: ; side is 16-byte aligned on stack. %struct.s41 = type { i32, i16, i32, i16 } define void @foo2(i8* %fmt, ...) nounwind { -entry: ; CHECK-LABEL: foo2: -; CHECK: ldr {{w[0-9]+}}, [sp, #48] -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #23 -; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 -; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #48 ; =48 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: str w8, [sp, #28] +; CHECK-NEXT: add x8, sp, #48 ; =48 +; CHECK-NEXT: add x8, x8, #23 ; =23 +; CHECK-NEXT: and x8, x8, #0xfffffffffffffff0 +; CHECK-NEXT: add x9, x8, #16 ; =16 +; CHECK-NEXT: stp x9, x0, [sp, #32] +; CHECK-NEXT: ldr q0, [x8] +; CHECK-NEXT: str q0, [sp], #48 +; CHECK-NEXT: ret +entry: %fmt.addr = alloca i8*, align 8 %args = alloca i8*, align 8 %vc = alloca i32, align 4 @@ -168,10 +247,25 @@ entry: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @bar2(i32 %x, i128 %s41.coerce) nounwind { -entry: ; CHECK-LABEL: bar2: -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK: str {{x[0-9]+}}, [sp] +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #80 ; =80 +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: str w0, [sp, #60] +; CHECK-NEXT: stp x1, x2, [sp, #32] +; CHECK-NEXT: stp x1, x2, [sp, #16] +; CHECK-NEXT: str x0, [sp] +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x0, l_.str@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: add x0, x0, l_.str@PAGEOFF +; CHECK-NEXT: bl _foo2 +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 ; =80 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3 +entry: %x.addr = alloca i32, align 4 %s41 = alloca %struct.s41, align 16 store i32 %x, i32* %x.addr, align 4 diff --git a/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll b/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll deleted file mode 100644 index 273fb31e16c3b..0000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s - -; CHECK-LABEL: main: -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK-NEXT: stp xzr, xzr, [sp, #-16]! -; CHECK: adrp x0, l_.str@PAGE -; CHECK: add x0, x0, l_.str@PAGEOFF -; CHECK-NEXT: bl _puts -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 -; CHECK-NEXT: ret - -@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00" - -define i32 @main() nounwind ssp optsize { -entry: - %local1 = alloca i64, align 8 - %local2 = alloca i64, align 8 - store i64 0, i64* %local1 - store i64 0, i64* %local2 - %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0)) - ret i32 %call -} - -declare i32 @puts(i8*) diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll new file mode 100644 index 0000000000000..820d08bd94b4d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll @@ -0,0 +1,80 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-none-eabi %s -o - | FileCheck %s + +; Function a's outlining candidate contains a sp modifying add without a +; corresponsing sub, so we shouldn't outline it. +define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" { +; CHECK-LABEL: a: // @a +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK-NOT: bl OUTLINED_FUNCTION_{{[0-9]+}} +; CHECK: autibsp +; CECK-NEXT: ret + ret void +} + +define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" nounwind { +; CHECK-LABEL: b: // @b +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: pacibsp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: bl [[OUTLINED_FUNC:OUTLINED_FUNCTION_[0-9]+]] +; CHECK: autibsp +; CHECK-NEXT: ret + ret void +} + +define void @c() "sign-return-address"="all" "sign-return-address-key"="b_key" nounwind { +; CHECK-LABEL: c: // @c +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: pacibsp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: bl [[OUTLINED_FUNC]] +; CHECK: autibsp +; CHECK-NEXT: ret + ret void +} + +; CHECK: [[OUTLINED_FUNC]] +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK: autibsp +; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll new file mode 100644 index 0000000000000..d8acaa9cbfd8e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll @@ -0,0 +1,68 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-none-eabi %s -o - | FileCheck %s + +define void @a() "sign-return-address"="all" { +; CHECK-LABEL: a: // @a +; CHECK: paciasp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +; CHECK: .cfi_endproc +} + +define void @b() "sign-return-address"="non-leaf" { +; CHECK-LABEL: b: // @b +; CHECK-NOT: paciasp +; CHECK-NOT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK-NOT: autiasp + ret void +; CHECK: .cfi_endproc +} + +define void @c() "sign-return-address"="all" { +; CHECK-LABEL: c: // @c +; CHECK: paciasp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +; CHECK: .cfi_endproc +} + +; CHECK-NOT: OUTLINED_FUNCTION_{{[0-9]+}}: +; CHECK-NOT: // -- Begin function diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll new file mode 100644 index 0000000000000..c7cea17e7cf2d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll @@ -0,0 +1,72 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-none-eabi %s -o - | FileCheck %s + +define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" { +; CHECK-LABEL: a: // @a +; CHECK: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1 + ret i64 %x +} + +define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" { +; CHECK-LABEL: b: // @b +; CHECK: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1 + ret i64 %x +} + +define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" { +; CHECK-LABEL: c: // @c +; CHECK: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1 + ret i64 %x +} + +; Outlined function is leaf-function => don't sign it +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: .cfi_b_key_frame +; CHECK-NOT: paci{{[a,b]}}sp +; CHECK-NOT: .cfi_negate_ra_state +; CHECK-NOT: auti{{[a,b]}}sp diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir new file mode 100644 index 0000000000000..e65adce5c1b4e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir @@ -0,0 +1,127 @@ +# RUN: llc -mtriple=aarch64-arm-none-eabi -run-pass=prologepilog \ +# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we save LR to a callee-saved register when possible. +# foo() should use a callee-saved register. However, bar() should not. +--- | + + define void @foo() #0 { + ret void + } + + define void @bar() #0 { + ret void + } + + attributes #0 = { nounwind "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" minsize noinline noredzone "no-frame-pointer-elim"="true" } +... +--- +# CHECK-LABEL: name: foo +# CHECK: bb.0: +# CHECK: frame-setup EMITBKEY +# CHECK-NEXT: frame-setup PACIBSP +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK: bb.1: +# CHECK: BL @[[OUTLINED_FUNCTION:OUTLINED_FUNCTION_[0-9]+]] +# CHECK: bb.2: +# CHECK: BL @[[OUTLINED_FUNCTION]] +# CHECK: bb.3: +# CHECK: BL @[[OUTLINED_FUNCTION]] +# CHECK: bb.4: +# CHECK: BL @[[OUTLINED_FUNCTION]] +# CHECK: bb.5: +# CHECK: frame-destroy AUTIBSP +# CHECK-NEXT: RET +name: foo +tracksRegLiveness: true +fixedStack: +body: | + bb.0: + $x25 = ORRXri $xzr, 1 + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr, $w9 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + bb.2: + liveins: $lr, $w9 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + bb.3: + liveins: $lr, $w9 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + bb.4: + liveins: $lr, $w9 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 1 + $w9 = ORRWri $wzr, 2 + bb.5: + liveins: $w9 + RET undef $lr + +... +--- +# CHECK: name: bar +# CHECK: bb.0: +# CHECK-NOT: OUTLINED_FUNCTION_ +# CHECK: bb.1: +# CHECK-NOT: OUTLINED_FUNCTION_ +# CHECK: bb.2: +# CHECK-NOT: OUTLINED_FUNCTION_ +# CHECK: bb.3: +# CHECK-NOT: OUTLINED_FUNCTION_ +# CHECK: RET +name: bar +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 2 + bb.1: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 2 + bb.2: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w10 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 2 + bb.3: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + RET undef $lr + +# CHECK: name: [[OUTLINED_FUNCTION]] +# CHECK: body: +# CHECK-NEXT: bb.0: +# CHECK-NOT: frame-setup EMITBKEY +# CHECK-NOT: frame-setup PACI{{[A,B]]}}SP +# CHECK-NOT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK-NOT: frame-destroy AUTI{{[A,B]]}}SP diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll new file mode 100644 index 0000000000000..4348d73743067 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll @@ -0,0 +1,69 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-none-eabi %s -o - | FileCheck %s + +define void @a() "sign-return-address"="all" { +; CHECK-LABEL: a: // @a +; CHECK: paciasp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +; CHECK: .cfi_endproc +} + +define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" { +; CHECK-LABEL: b: // @b +; CHECK: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK-NOT: autiasp + ret void +; CHECK: .cfi_endproc +} + +define void @c() "sign-return-address"="all" { +; CHECK-LABEL: c: // @c +; CHECK: paciasp +; CHECK-NEXT: .cfi_negate_ra_state + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +; CHECK: .cfi_endproc +} + +; CHECK-NOT: OUTLINED_FUNCTION_0: +; CHECK-NOT: // -- Begin function diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-a.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-a.ll new file mode 100644 index 0000000000000..f5e229a20ef28 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-a.ll @@ -0,0 +1,64 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-none-eabi %s -o - | FileCheck %s + +define void @a() "sign-return-address"="all" "sign-return-address-key"="a_key" nounwind { +; CHECK-LABEL: a: // @a +; CHECK: paciasp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +} + +define void @b() "sign-return-address"="all" nounwind { +; CHECK-LABEL: b: // @b +; CHECK: paciasp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +} + +define void @c() "sign-return-address"="all" nounwind { +; CHECK-LABEL: c: // @c +; CHECK: paciasp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autiasp + ret void +} + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: paciasp +; CHECK: autiasp +; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-b.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-b.ll new file mode 100644 index 0000000000000..c1940b44d2dad --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-same-key-b.ll @@ -0,0 +1,70 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-none-eabi %s -o - | FileCheck %s + +define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" nounwind { +; CHECK-LABEL: a: // @a +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: pacibsp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autibsp + ret void +} + +define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" nounwind { +; CHECK-LABEL: b: // @b +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: pacibsp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autibsp + ret void +} + +define void @c() "sign-return-address"="all" "sign-return-address-key"="b_key" nounwind { +; CHECK-LABEL: c: // @c +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: pacibsp + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autibsp + ret void +} + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK: autibsp +; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir new file mode 100644 index 0000000000000..2645a6553ffd8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir @@ -0,0 +1,204 @@ +# RUN: llc -verify-machineinstrs -run-pass=machine-outliner %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-linux-gnu" + + @v = common dso_local global i32* null, align 8 + + ; Function Attrs: nounwind + define dso_local void @legal0() #0 { + %1 = alloca i32, align 4 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + ret void + } + + ; Function Attrs: nounwind + define dso_local void @legal1() #0 { + %1 = alloca i32, align 4 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + ret void + } + + ; Function Attrs: nounwind + define dso_local void @illegal0() #0 { + %1 = alloca i32, align 4 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + ret void + } + + ; Function Attrs: nounwind + define dso_local void @illegal1() #0 { + %1 = alloca i32, align 4 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + store volatile i32* %1, i32** @v, align 8 + ret void + } + + attributes #0 = { nounwind "sign-return-address"="all" "sign-return-address-key"="a_key" noinline noredzone "no-frame-pointer-elim"="true" } + +... +--- +name: legal0 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $lr + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-setup CFI_INSTRUCTION negate_ra_sign_state + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x8 = ADRP target-flags(aarch64-page) @v + $x9 = ADDXri $sp, 12, 0 + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + $sp = frame-destroy ADDXri $sp, 16, 0 + frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + RET undef $lr + +# CHECK-LABEL: name: legal0 +# CHECK: body: | +# CHECK-NEXT: bb.0 (%ir-block.0): +# CHECK-NEXT: liveins: $lr +# CHECK: frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK: BL @[[OUTLINED_FUNC:OUTLINED_FUNCTION_[0-9]+]] +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: RET undef $lr + +... +--- +name: legal1 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $lr + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-setup CFI_INSTRUCTION negate_ra_sign_state + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x8 = ADRP target-flags(aarch64-page) @v + $x9 = ADDXri $sp, 12, 0 + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + $sp = frame-destroy ADDXri $sp, 16, 0 + frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + RET undef $lr + +# CHECK-LABEL: name: legal1 +# CHECK: body: | +# CHECK-NEXT: bb.0 (%ir-block.0): +# CHECK-NEXT: liveins: $lr +# CHECK: frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK: BL @[[OUTLINED_FUNC]] +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: RET undef $lr + +... +--- +name: illegal0 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $lr + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-setup CFI_INSTRUCTION negate_ra_sign_state + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x8 = ADRP target-flags(aarch64-page) @v + $x9 = ADDXri $sp, 12, 0 + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + $sp = frame-destroy ADDXri $sp, 12, 0 + frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + RET undef $lr + +... +--- +name: illegal1 +tracksRegLiveness: true +body: | + bb.0 (%ir-block.0): + liveins: $lr + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-setup CFI_INSTRUCTION negate_ra_sign_state + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x8 = ADRP target-flags(aarch64-page) @v + $x9 = ADDXri $sp, 12, 0 + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + $sp = frame-destroy ADDXri $sp, 12, 0 + frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + RET undef $lr + +# CHECK-LABEL: name: illegal0 +# CHECK: body: | +# CHECK-NEXT: bb.0 (%ir-block.0): +# CHECK-NEXT: liveins: $lr +# CHECK: frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK-NOT: BL @OUTLINED_FUNCTION_{{.*}} +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: RET undef $lr + +# CHECK-LABEL: name: illegal1 +# CHECK: body: | +# CHECK-NEXT: bb.0 (%ir-block.0): +# CHECK-NEXT: liveins: $lr +# CHECK: frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK-NOT: BL @OUTLINED_FUNCTION_{{.*}} +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp +# CHECK-NEXT: RET undef $lr + +# Outlined function that contains only legal sp modifications +# CHECK: name: [[OUTLINED_FUNC]] +# CHECK: body: | +# CHECK-NEXT: bb.0: +# CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 +# CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll new file mode 100644 index 0000000000000..c2bb291506ae2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll @@ -0,0 +1,87 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-linux-gnu %s -o - | FileCheck %s + +; Check that functions that should sign their return addresses don't get +; outlined if not all of the function either support v8.3a features or all of +; the functions don't!! + +define void @a() #0 { +; CHECK-LABEL: a: // @a +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK-NOT: OUTLINED_FUNCTION_ + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: retab +; CHECK-NOT: auti[a,b]sp + ret void +} + +define void @b() #0 { +; CHECK-LABEL: b: // @b +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK-NOT: OUTLINED_FUNCTION_ + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: retab +; CHECK-NOT: auti[a,b]sp + ret void +} + +define void @c() #1 { +; CHECK-LABEL: c: // @c +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK-NOT: OUTLINED_FUNCTION_ + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: autibsp +; CHECK-NOT: ret{{[a,b]}} + ret void +} + +attributes #0 = { "sign-return-address"="all" + "sign-return-address-key"="b_key" + "target-features"="+v8.3a" } + +attributes #1 = { "sign-return-address"="all" + "sign-return-address-key"="b_key" } + +; CHECK-NOT: OUTLINED_FUNCTION_ diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll new file mode 100644 index 0000000000000..d76dc5ef8a8e7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll @@ -0,0 +1,63 @@ +; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner \ +; RUN: -verify-machineinstrs %s -o - | FileCheck %s + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() #0 { +; CHECK-LABEL: a: // @a +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: paciasp +; CHECK: autiasp +; CHECK-NEXT: ret +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() #0 { +; CHECK-LABEL: b: // @b +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: paciasp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK: autiasp +; CHECK-NEXT: ret +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +define hidden i32 @c(i32 (i32, i32, i32, i32)* %fptr) #0 { +; CHECK-LABEL: c: // @c +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: paciasp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK: autiasp +; CHECK-NEXT: ret +entry: + %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) + %add = add nsw i32 %call, 8 + ret i32 %add +} + +define hidden i32 @d(i32 (i32, i32, i32, i32)* %fptr) #0 { +; CHECK-LABEL: d: // @d +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: paciasp +; CHECK-NEXT: .cfi_negate_ra_state +; CHECK: autiasp +; CHECK-NEXT: ret +entry: + %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) + %add = add nsw i32 %call, 88 + ret i32 %add +} + +attributes #0 = { "sign-return-address"="non-leaf" } + +; CHECK-NOT: [[OUTLINED_FUNCTION_{{.*}}]] +; CHECK-NOT: .cfi_b_key_frame +; CHECK-NOT: paci{{[a,b]}}sp +; CHECK-NOT: .cfi_negate_ra_state +; CHECK-NOT: auti{{[a,b]}}sp diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll new file mode 100644 index 0000000000000..05f4dc2e8c2cf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-v8-3.ll @@ -0,0 +1,83 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple \ +; RUN: aarch64-arm-linux-gnu %s -o - | FileCheck %s + +; Check that outlined functions use the dedicated RETAA/RETAB instructions +; to sign their return address if available. + +define void @a() #0 { +; CHECK-LABEL: a: // @a +; CHECK: // %bb.0: +; CHECK-NEXT: pacibsp +; CHECK: bl [[OUTLINED_FUNC:OUTLINED_FUNCTION_[0-9]+]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: retab +; CHECK-NOT: auti[a,b]sp + ret void +} + +define void @b() #0 { +; CHECK-LABEL: b: // @b +; CHECK: // %bb.0: +; CHECK-NEXT: pacibsp +; CHECK: bl OUTLINED_FUNC + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: retab +; CHECK-NOT: auti[a,b]sp + ret void +} + +define void @c() #0 { +; CHECK-LABEL: c: // @c +; CHECK: // %bb.0: +; CHECK-NEXT: pacibsp +; CHECK: bl OUTLINED_FUNC + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 +; CHECK: retab +; CHECK-NOT: auti[a,b]sp + ret void +} + +attributes #0 = { "sign-return-address"="all" + "sign-return-address-key"="b_key" + "target-features"="+v8.3a" + nounwind } + +; CHECK: OUTLINED_FUNC +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_b_key_frame +; CHECK-NEXT: pacibsp +; CHECK: retab +; CHECK-NOT: auti[a,b]sp diff --git a/llvm/test/CodeGen/AArch64/macro-fusion.ll b/llvm/test/CodeGen/AArch64/macro-fusion.ll index 97bca14df5790..b9a263fe2e23a 100644 --- a/llvm/test/CodeGen/AArch64/macro-fusion.ll +++ b/llvm/test/CodeGen/AArch64/macro-fusion.ll @@ -1,21 +1,18 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+fuse-arith-logic -verify-misched -debug-only=machine-scheduler 2>&1 > /dev/null | FileCheck %s -; Verify that, the macro-fusion creates the necessary dependencies between SUs. +; Verify that, the macro-fusion creates the necessary dependencies between SUs and +; only 2 SU's are fused at most. define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) { entry: ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: %bb.0 entry ; CHECK: Macro fuse: SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) ; CHECK: Bind SU([[SU1:[0-9]+]]) - SU([[SU4]]) -; CHECK: Macro fuse: SU([[SU5]]) - SU([[SU6:[0-9]+]]) -; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU5]]) -; CHECK: SU([[SU0]]): %{{[0-9]+}}:gpr32 = COPY $w3 +; CHECK-NOT: Macro fuse: ; CHECK: SU([[SU1]]): %{{[0-9]+}}:gpr32 = COPY $w2 ; CHECK: SU([[SU4]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr ; CHECK: SU([[SU5]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr -; CHECK: SU([[SU6]]): %{{[0-9]+}}:gpr32 = nsw SUBWrr - %add = add nsw i32 %b, %a %add1 = add nsw i32 %add, %c %sub = sub nsw i32 %add1, %d diff --git a/llvm/test/CodeGen/AArch64/neon-mla-mls.ll b/llvm/test/CodeGen/AArch64/neon-mla-mls.ll index 71bb0e70abfaa..a4b9ef8eff575 100644 --- a/llvm/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/llvm/test/CodeGen/AArch64/neon-mla-mls.ll @@ -1,85 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { -;CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-LABEL: mla8xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = add <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 } define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -;CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-LABEL: mla16xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <16 x i8> %A, %B; %tmp2 = add <16 x i8> %C, %tmp1; ret <16 x i8> %tmp2 } define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { -;CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK-LABEL: mla4xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = add <4 x i16> %C, %tmp1; ret <4 x i16> %tmp2 } define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { -;CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-LABEL: mla8xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <8 x i16> %A, %B; %tmp2 = add <8 x i16> %C, %tmp1; ret <8 x i16> %tmp2 } define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { -;CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-LABEL: mla2xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = add <2 x i32> %C, %tmp1; ret <2 x i32> %tmp2 } define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { -;CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-LABEL: mla4xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <4 x i32> %A, %B; %tmp2 = add <4 x i32> %C, %tmp1; ret <4 x i32> %tmp2 } define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { -;CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-LABEL: mls8xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mls v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 } define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -;CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-LABEL: mls16xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mls v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <16 x i8> %A, %B; %tmp2 = sub <16 x i8> %C, %tmp1; ret <16 x i8> %tmp2 } define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { -;CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK-LABEL: mls4xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mls v2.4h, v0.4h, v1.4h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %C, %tmp1; ret <4 x i16> %tmp2 } define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { -;CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-LABEL: mls8xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mls v2.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <8 x i16> %A, %B; %tmp2 = sub <8 x i16> %C, %tmp1; ret <8 x i16> %tmp2 } define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { -;CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-LABEL: mls2xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mls v2.2s, v0.2s, v1.2s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %C, %tmp1; ret <2 x i32> %tmp2 } define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { -;CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-LABEL: mls4xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mls v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %tmp1 = mul <4 x i32> %A, %B; %tmp2 = sub <4 x i32> %C, %tmp1; ret <4 x i32> %tmp2 diff --git a/llvm/test/CodeGen/AArch64/neon-vcadd.ll b/llvm/test/CodeGen/AArch64/neon-vcadd.ll new file mode 100644 index 0000000000000..11605267c09b4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-vcadd.ll @@ -0,0 +1,67 @@ +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s + +define <4 x half> @foo16x4_rot(<4 x half> %a, <4 x half> %b) { +entry: +; CHECK-LABEL: foo16x4_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #270 + %vcadd_rot90_v2.i = tail call <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16(<4 x half> %a, <4 x half> %b) + %vcadd_rot270_v2.i = tail call <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16(<4 x half> %a, <4 x half> %b) + %add = fadd <4 x half> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <4 x half> %add +} + +define <2 x float> @foo32x2_rot(<2 x float> %a, <2 x float> %b) { +entry: +; CHECK-LABEL: foo32x2_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #270 + %vcadd_rot90_v2.i = tail call <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32(<2 x float> %a, <2 x float> %b) + %vcadd_rot270_v2.i = tail call <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32(<2 x float> %a, <2 x float> %b) + %add = fadd <2 x float> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <2 x float> %add +} + +define <8 x half> @foo16x8_rot(<8 x half> %a, <8 x half> %b) { +entry: +; CHECK-LABEL: foo16x8_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #270 + %vcaddq_rot90_v2.i = tail call <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16(<8 x half> %a, <8 x half> %b) + %vcaddq_rot270_v2.i = tail call <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16(<8 x half> %a, <8 x half> %b) + %add = fadd <8 x half> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <8 x half> %add +} + +define <4 x float> @foo32x4_rot(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: foo32x4_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #270 + %vcaddq_rot90_v2.i = tail call <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32(<4 x float> %a, <4 x float> %b) + %vcaddq_rot270_v2.i = tail call <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32(<4 x float> %a, <4 x float> %b) + %add = fadd <4 x float> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <4 x float> %add +} + +define <2 x double> @foo64x2_rot(<2 x double> %a, <2 x double> %b) { +entry: +; CHECK-LABEL: foo64x2_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #270 + %vcaddq_rot90_v2.i = tail call <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64(<2 x double> %a, <2 x double> %b) + %vcaddq_rot270_v2.i = tail call <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64(<2 x double> %a, <2 x double> %b) + %add = fadd <2 x double> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <2 x double> %add +} + +declare <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16(<4 x half>, <4 x half>) +declare <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32(<2 x float>, <2 x float>) +declare <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16(<8 x half>, <8 x half>) +declare <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16(<8 x half>, <8 x half>) +declare <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64(<2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll new file mode 100644 index 0000000000000..67b54e46e36cb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s --mtriple aarch64 -verify-machineinstrs -o - | FileCheck %s + +define dso_local void @jsimd_idct_ifast_neon_intrinsic(i8* nocapture readonly %dct_table, i16* nocapture readonly %coef_block, i8** nocapture readonly %output_buf, i32 %output_col) local_unnamed_addr #0 { +; CHECK-LABEL: jsimd_idct_ifast_neon_intrinsic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x1, #32] +; CHECK-NEXT: ldr q1, [x1, #96] +; CHECK-NEXT: ldr q2, [x0, #32] +; CHECK-NEXT: ldr q3, [x0, #96] +; CHECK-NEXT: ldr x8, [x2, #48] +; CHECK-NEXT: mov w9, w3 +; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h +; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h +; CHECK-NEXT: add v2.8h, v0.8h, v1.8h +; CHECK-NEXT: str q2, [x8, x9] +; CHECK-NEXT: ldr x8, [x2, #56] +; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-NEXT: str q0, [x8, x9] +; CHECK-NEXT: ret +entry: + %add.ptr5 = getelementptr inbounds i16, i16* %coef_block, i64 16 + %0 = bitcast i16* %add.ptr5 to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 16 + + %add.ptr17 = getelementptr inbounds i16, i16* %coef_block, i64 48 + %2 = bitcast i16* %add.ptr17 to <8 x i16>* + %3 = load <8 x i16>, <8 x i16>* %2, align 16 + + %add.ptr29 = getelementptr inbounds i8, i8* %dct_table, i64 32 + %4 = bitcast i8* %add.ptr29 to <8 x i16>* + %5 = load <8 x i16>, <8 x i16>* %4, align 16 + + %add.ptr41 = getelementptr inbounds i8, i8* %dct_table, i64 96 + %6 = bitcast i8* %add.ptr41 to <8 x i16>* + %7 = load <8 x i16>, <8 x i16>* %6, align 16 + + %mul.i966 = mul <8 x i16> %5, %1 + %mul.i964 = mul <8 x i16> %7, %3 + + %add.i961 = add <8 x i16> %mul.i966, %mul.i964 + %sub.i960 = sub <8 x i16> %mul.i966, %mul.i964 + + %idx.ext = zext i32 %output_col to i64 + + %arrayidx404 = getelementptr inbounds i8*, i8** %output_buf, i64 6 + %8 = load i8*, i8** %arrayidx404, align 8 + %add.ptr406 = getelementptr inbounds i8, i8* %8, i64 %idx.ext + %9 = bitcast i8* %add.ptr406 to <8 x i16>* + store <8 x i16> %add.i961, <8 x i16>* %9, align 8 + + %arrayidx408 = getelementptr inbounds i8*, i8** %output_buf, i64 7 + %10 = load i8*, i8** %arrayidx408, align 8 + %add.ptr410 = getelementptr inbounds i8, i8* %10, i64 %idx.ext + %11 = bitcast i8* %add.ptr410 to <8 x i16>* + store <8 x i16> %sub.i960, <8 x i16>* %11, align 8 + + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll index 5e12981fd67e3..ad6dc9c2d23a6 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll @@ -125,6 +125,46 @@ define @xor_pred_i64( %pg, %out } +define @bic_pred_i8( %pg, %a, %b) { +; CHECK-LABEL: bic_pred_i8: +; CHECK: bic z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bic.nxv2i8( %pg, + %a, + %b) + ret %out +} + +define @bic_pred_i16( %pg, %a, %b) { +; CHECK-LABEL: bic_pred_i16: +; CHECK: bic z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bic.nxv2i16( %pg, + %a, + %b) + ret %out +} + + +define @bic_pred_i32( %pg, %a, %b) { +; CHECK-LABEL: bic_pred_i32: +; CHECK: bic z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bic.nxv2i32( %pg, + %a, + %b) + ret %out +} + +define @bic_pred_i64( %pg, %a, %b) { +; CHECK-LABEL: bic_pred_i64: +; CHECK: bic z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a, + %b) + ret %out +} declare @llvm.aarch64.sve.and.nxv2i8(,,) declare @llvm.aarch64.sve.and.nxv2i16(,,) @@ -138,3 +178,7 @@ declare @llvm.aarch64.sve.xor.nxv2i8(, @llvm.aarch64.sve.xor.nxv2i16(,,) declare @llvm.aarch64.sve.xor.nxv2i32(,,) declare @llvm.aarch64.sve.xor.nxv2i64(,,) +declare @llvm.aarch64.sve.bic.nxv2i8(,,) +declare @llvm.aarch64.sve.bic.nxv2i16(,,) +declare @llvm.aarch64.sve.bic.nxv2i32(,,) +declare @llvm.aarch64.sve.bic.nxv2i64(,,) diff --git a/llvm/test/CodeGen/AArch64/sve-int-log.ll b/llvm/test/CodeGen/AArch64/sve-int-log.ll index cdd562823bf7f..3c45d0511f7a8 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-log.ll @@ -99,8 +99,8 @@ define @bic_d( %a, %b) { ; CHECK-LABEL: bic_d ; CHECK: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.bic.nxv2i64( %a, - %b) + %res = call @llvm.aarch64.sve.bic.base.nxv2i64( %a, + %b) ret %res } @@ -108,8 +108,8 @@ define @bic_s( %a, %b) { ; CHECK-LABEL: bic_s ; CHECK: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.bic.nxv4i32( %a, - %b) + %res = call @llvm.aarch64.sve.bic.base.nxv4i32( %a, + %b) ret %res } @@ -117,8 +117,8 @@ define @bic_h( %a, %b) { ; CHECK-LABEL: bic_h ; CHECK: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.bic.nxv8i16( %a, - %b) + %res = call @llvm.aarch64.sve.bic.base.nxv8i16( %a, + %b) ret %res } @@ -127,12 +127,12 @@ define @bic_b( %a, %b) { ; CHECK-LABEL: bic_b ; CHECK: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.bic.nxv16i8( %a, - %b) + %res = call @llvm.aarch64.sve.bic.base.nxv16i8( %a, + %b) ret %res } -declare @llvm.aarch64.sve.bic.nxv2i64(, ) -declare @llvm.aarch64.sve.bic.nxv4i32(, ) -declare @llvm.aarch64.sve.bic.nxv8i16(, ) -declare @llvm.aarch64.sve.bic.nxv16i8(, ) +declare @llvm.aarch64.sve.bic.base.nxv2i64(, ) +declare @llvm.aarch64.sve.bic.base.nxv4i32(, ) +declare @llvm.aarch64.sve.bic.base.nxv8i16(, ) +declare @llvm.aarch64.sve.bic.base.nxv16i8(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll new file mode 100644 index 0000000000000..a3fd4faf196f0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll @@ -0,0 +1,99 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; CNTB +; + +define i64 @cntb() { +; CHECK-LABEL: cntb: +; CHECK: cntb x0, vl2 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntb(i32 2) + ret i64 %out +} + +; +; CNTH +; + +define i64 @cnth() { +; CHECK-LABEL: cnth: +; CHECK: cnth x0, vl3 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cnth(i32 3) + ret i64 %out +} + +; +; CNTW +; + +define i64 @cntw() { +; CHECK-LABEL: cntw: +; CHECK: cntw x0, vl4 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntw(i32 4) + ret i64 %out +} + +; +; CNTD +; + +define i64 @cntd() { +; CHECK-LABEL: cntd: +; CHECK: cntd x0, vl5 +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntd(i32 5) + ret i64 %out +} + +; +; CNTP +; + +define i64 @cntp_b8( %pg, %a) { +; CHECK-LABEL: cntp_b8: +; CHECK: cntp x0, p0, p1.b +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, + %a) + ret i64 %out +} + +define i64 @cntp_b16( %pg, %a) { +; CHECK-LABEL: cntp_b16: +; CHECK: cntp x0, p0, p1.h +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, + %a) + ret i64 %out +} + +define i64 @cntp_b32( %pg, %a) { +; CHECK-LABEL: cntp_b32: +; CHECK: cntp x0, p0, p1.s +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, + %a) + ret i64 %out +} + +define i64 @cntp_b64( %pg, %a) { +; CHECK-LABEL: cntp_b64: +; CHECK: cntp x0, p0, p1.d +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, + %a) + ret i64 %out +} + +declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) +declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) +declare i64 @llvm.aarch64.sve.cntw(i32 %pattern) +declare i64 @llvm.aarch64.sve.cntd(i32 %pattern) + +declare i64 @llvm.aarch64.sve.cntp.nxv16i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv8i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv4i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv2i1(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..e777a2f3b8b04 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll @@ -0,0 +1,400 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; FCVT +; + +define @fcvt_f16_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f16_f32: +; CHECK: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f16f32( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f16_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f16_f64: +; CHECK: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f16f64( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f32_f16: +; CHECK: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f32_f64: +; CHECK: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f64_f16: +; CHECK: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f64_f32: +; CHECK: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTZS +; + +define @fcvtzs_i16_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i16_f16: +; CHECK: fcvtzs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f32: +; CHECK: fcvtzs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f64: +; CHECK: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f16: +; CHECK: fcvtzs z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f64: +; CHECK: fcvtzs z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f16: +; CHECK: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f32: +; CHECK: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTZU +; + +define @fcvtzu_i16_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i16_f16: +; CHECK: fcvtzu z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f32: +; CHECK: fcvtzu z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f64: +; CHECK: fcvtzu z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f16: +; CHECK: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f64: +; CHECK: fcvtzu z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f16: +; CHECK: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f32: +; CHECK: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i64f32( %a, + %pg, + %b) + ret %out +} + +; +; SCVTF +; + +define @scvtf_f16_i16( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i16: +; CHECK: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f32_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f32_i32: +; CHECK: scvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f64_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f64_i64: +; CHECK: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f16_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i32: +; CHECK: scvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f16i32( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f16_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i64: +; CHECK: scvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f16i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f32_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f32_i64: +; CHECK: scvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f32i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f64_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f64_i32: +; CHECK: scvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f64i32( %a, + %pg, + %b) + ret %out +} + +; +; UCVTF +; + +define @ucvtf_f16_i16( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i16: +; CHECK: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f32_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f32_i32: +; CHECK: ucvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f64_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f64_i64: +; CHECK: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f16_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i32: +; CHECK: ucvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f16i32( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f16_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i64: +; CHECK: ucvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f16i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f32_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f32_i64: +; CHECK: ucvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f32i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f64_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f64_i32: +; CHECK: ucvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f64i32( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.fcvt.f16f32(, , ) +declare @llvm.aarch64.sve.fcvt.f16f64(, , ) +declare @llvm.aarch64.sve.fcvt.f32f16(, , ) +declare @llvm.aarch64.sve.fcvt.f32f64(, , ) +declare @llvm.aarch64.sve.fcvt.f64f16(, , ) +declare @llvm.aarch64.sve.fcvt.f64f32(, , ) + +declare @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcvtzs.i32f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.i32f64(, , ) +declare @llvm.aarch64.sve.fcvtzs.i64f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.i64f32(, , ) + +declare @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcvtzu.i32f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.i32f64(, , ) +declare @llvm.aarch64.sve.fcvtzu.i64f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.i64f32(, , ) + +declare @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(, , ) +declare @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(, , ) +declare @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(, , ) +declare @llvm.aarch64.sve.scvtf.f16i32(, , ) +declare @llvm.aarch64.sve.scvtf.f16i64(, , ) +declare @llvm.aarch64.sve.scvtf.f32i64(, , ) +declare @llvm.aarch64.sve.scvtf.f64i32(, , ) + +declare @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f16i32(, , ) +declare @llvm.aarch64.sve.ucvtf.f16i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f32i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f64i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll new file mode 100644 index 0000000000000..74241389d3ac2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll @@ -0,0 +1,198 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1H, LD1W, LD1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw) +; extended to 64 bits +; e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] +; + +; LD1H +define @gld1h_s_uxtw_index( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_s_uxtw_index: +; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_s_sxtw_index( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_s_sxtw_index: +; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_d_uxtw_index( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_d_uxtw_index: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_d_sxtw_index( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_d_sxtw_index: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_uxtw_index( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_s_uxtw_index: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32.nxv4i32( %pg, + i32* %base, + %b) + ret %load +} + +define @gld1w_s_sxtw_index( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_s_sxtw_index: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32.nxv4i32( %pg, + i32* %base, + %b) + ret %load +} + +define @gld1w_d_uxtw_index( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_d_uxtw_index: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64( %pg, + i32* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1w_d_sxtw_index( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_d_sxtw_index: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64( %pg, + i32* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1w_s_uxtw_index_float( %pg, float* %base, %b) { +; CHECK-LABEL: gld1w_s_uxtw_index_float: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32.nxv4i32( %pg, + float* %base, + %b) + ret %load +} + +define @gld1w_s_sxtw_index_float( %pg, float* %base, %b) { +; CHECK-LABEL: gld1w_s_sxtw_index_float: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32.nxv4i32( %pg, + float* %base, + %b) + ret %load +} + +; LD1D +define @gld1d_s_uxtw_index( %pg, i64* %base, %b) { +; CHECK-LABEL: gld1d_s_uxtw_index: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64.nxv2i64( %pg, + i64* %base, + %b) + ret %load +} + +define @gld1d_sxtw_index( %pg, i64* %base, %b) { +; CHECK-LABEL: gld1d_sxtw_index: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64.nxv2i64( %pg, + i64* %base, + %b) + ret %load +} + +define @gld1d_uxtw_index_double( %pg, double* %base, %b) { +; CHECK-LABEL: gld1d_uxtw_index_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64.nxv2i64( %pg, + double* %base, + %b) + ret %load +} + +define @gld1d_sxtw_index_double( %pg, double* %base, %b) { +; CHECK-LABEL: gld1d_sxtw_index_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64.nxv2i64( %pg, + double* %base, + %b) + ret %load +} + +; LD1H +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32(, i16*, ) + +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64(, i16*, ) + +; LD1W +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32.nxv4i32(, i32*, ) + +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64(, i32*, ) + +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32.nxv4i32(, float*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32.nxv4i32(, float*, ) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64.nxv2i64(, i64*, ) + +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64.nxv2i64(, double*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64.nxv2i64(, double*, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll new file mode 100644 index 0000000000000..a4d26f29a9db3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll @@ -0,0 +1,259 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: base + 32-bit unscaled offset, sign (sxtw) or zero +; (uxtw) extended to 64 bits. +; e.g. ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] +; + +; LD1B +define @gld1b_s_uxtw( %pg, i8* %base, %b) { +; CHECK-LABEL: gld1b_s_uxtw: +; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32( %pg, + i8* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1b_s_sxtw( %pg, i8* %base, %b) { +; CHECK-LABEL: gld1b_s_sxtw: +; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32( %pg, + i8* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1b_d_uxtw( %pg, i8* %base, %b) { +; CHECK-LABEL: gld1b_d_uxtw: +; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64( %pg, + i8* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1b_d_sxtw( %pg, i8* %base, %b) { +; CHECK-LABEL: gld1b_d_sxtw: +; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64( %pg, + i8* %base, + %b) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_uxtw( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_s_uxtw: +; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_s_sxtw( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_s_sxtw: +; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_d_uxtw( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_d_uxtw: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_d_sxtw( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_d_sxtw: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_uxtw( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_s_uxtw: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32.nxv4i32( %pg, + i32* %base, + %b) + ret %load +} + +define @gld1w_s_sxtw( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_s_sxtw: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32.nxv4i32( %pg, + i32* %base, + %b) + ret %load +} + +define @gld1w_d_uxtw( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_d_uxtw: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64( %pg, + i32* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1w_d_sxtw( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_d_sxtw: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64( %pg, + i32* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1w_s_uxtw_float( %pg, float* %base, %b) { +; CHECK-LABEL: gld1w_s_uxtw_float: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32.nxv4i32( %pg, + float* %base, + %b) + ret %load +} + +define @gld1w_s_sxtw_float( %pg, float* %base, %b) { +; CHECK-LABEL: gld1w_s_sxtw_float: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32.nxv4i32( %pg, + float* %base, + %b) + ret %load +} + +; LD1D +define @gld1d_d_uxtw( %pg, i64* %base, %b) { +; CHECK-LABEL: gld1d_d_uxtw: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64.nxv2i64( %pg, + i64* %base, + %b) + ret %load +} + +define @gld1d_d_sxtw( %pg, i64* %base, %b) { +; CHECK-LABEL: gld1d_d_sxtw: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64.nxv2i64( %pg, + i64* %base, + %b) + ret %load +} + +define @gld1d_d_uxtw_double( %pg, double* %base, %b) { +; CHECK-LABEL: gld1d_d_uxtw_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64.nxv2i64( %pg, + double* %base, + %b) + ret %load +} + +define @gld1d_d_sxtw_double( %pg, double* %base, %b) { +; CHECK-LABEL: gld1d_d_sxtw_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64.nxv2i64( %pg, + double* %base, + %b) + ret %load +} + +; LD1B +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64(, i8*, ) + +; LD1H +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64(, i16*, ) + +; LD1W +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64(, i32*, ) + +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32.nxv4i32(, float*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32.nxv4i32(, float*, ) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64.nxv2i64(, i64*, ) + +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64.nxv2i64(, double*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64.nxv2i64(, double*, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll new file mode 100644 index 0000000000000..274eaad0eef1d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1H, LD1W, LD1D: base + 64-bit scaled offset +; e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1] +; + +define @gld1h_index( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_index +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.index.nxv2i16( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1w_index( %pg, i32* %base, %b) { +; CHECK-LABEL: gld1w_index +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.index.nxv2i32( %pg, + i32* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1d_index( %pg, i64* %base, %b) { +; CHECK-LABEL: gld1d_index +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.index.nxv2i64( %pg, + i64* %base, + %b) + ret %load +} + +define @gld1d_index_double( %pg, double* %base, %b) { +; CHECK-LABEL: gld1d_index_double +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.index.nxv2f64( %pg, + double* %base, + %b) + ret %load +} + +declare @llvm.aarch64.sve.ld1.gather.index.nxv2i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.index.nxv2i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.index.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.index.nxv2f64(, double*, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll new file mode 100644 index 0000000000000..9a8df453b336f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll @@ -0,0 +1,74 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: base + 64-bit unscaled offset +; e.g. ld1h { z0.d }, p0/z, [x0, z0.d] +; + +define @gld1b_d( %pg, i8* %base, %b) { +; CHECK-LABEL: gld1b_d: +; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.nxv2i8( %pg, + i8* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1h_d( %pg, i16* %base, %b) { +; CHECK-LABEL: gld1h_d: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.nxv2i16( %pg, + i16* %base, + %b) + %res = zext %load to + ret %res +} + +define @gld1w_d( %pg, i32* %base, %offsets) { +; CHECK-LABEL: gld1w_d: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.nxv2i32( %pg, + i32* %base, + %offsets) + %res = zext %load to + ret %res +} + +define @gld1d_d( %pg, i64* %base, %b) { +; CHECK-LABEL: gld1d_d: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.nxv2i64( %pg, + i64* %base, + %b) + ret %load +} + +define @gld1d_d_double( %pg, double* %base, %b) { +; CHECK-LABEL: gld1d_d_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.nxv2f64( %pg, + double* %base, + %b) + ret %load +} + +declare @llvm.aarch64.sve.ld1.gather.nxv2i8(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.nxv2i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.nxv2i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.nxv2f64(, double*, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll new file mode 100644 index 0000000000000..42d9f86302456 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll @@ -0,0 +1,139 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector + immediate (index) +; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] +; + +; LD1B +define @gld1b_s_imm( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm: +; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm: +; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm: +; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm: +; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +define @gld1w_d_imm( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm: +; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_float: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +; LD1D +define @gld1d_d_imm( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +define @gld1d_d_imm_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_double: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +; LD1B +declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(, , i64) + +; LD1H +declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(, , i64) + +; LD1W +declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll new file mode 100644 index 0000000000000..69adf7fc68380 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll @@ -0,0 +1,166 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; RBIT +; + +define @rbit_i8( %a, %pg, %b) { +; CHECK-LABEL: rbit_i8: +; CHECK: rbit z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.rbit.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @rbit_i16( %a, %pg, %b) { +; CHECK-LABEL: rbit_i16: +; CHECK: rbit z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.rbit.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @rbit_i32( %a, %pg, %b) { +; CHECK-LABEL: rbit_i32: +; CHECK: rbit z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.rbit.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @rbit_i64( %a, %pg, %b) { +; CHECK-LABEL: rbit_i64: +; CHECK: rbit z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.rbit.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; REVB +; + +define @revb_i16( %a, %pg, %b) { +; CHECK-LABEL: revb_i16: +; CHECK: revb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.revb.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @revb_i32( %a, %pg, %b) { +; CHECK-LABEL: revb_i32: +; CHECK: revb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.revb.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @revb_i64( %a, %pg, %b) { +; CHECK-LABEL: revb_i64: +; CHECK: revb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.revb.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; REVB (bswap) +; + +define @revb_i16_bswap( %a) { +; CHECK-LABEL: revb_i16_bswap: +; CHECK: ptrue [[PG:p[0-9]+]].h +; CHECK-NEXT: revb z0.h, [[PG]]/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.bswap.nxv8i16( %a) + ret %res +} + +define @revb_i32_bswap( %a) { +; CHECK-LABEL: revb_i32_bswap: +; CHECK: ptrue [[PG:p[0-9]+]].s +; CHECK-NEXT: revb z0.s, [[PG]]/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.bswap.nxv4i32( %a) + ret %res +} + +define @revb_i64_bswap( %a) { +; CHECK-LABEL: revb_i64_bswap: +; CHECK: ptrue [[PG:p[0-9]+]].d +; CHECK-NEXT: revb z0.d, [[PG]]/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.bswap.nxv2i64( %a) + ret %res +} + +; +; REVH +; + +define @revh_i32( %a, %pg, %b) { +; CHECK-LABEL: revh_i32: +; CHECK: revh z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.revh.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @revh_i64( %a, %pg, %b) { +; CHECK-LABEL: revh_i64: +; CHECK: revh z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.revh.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; REVW +; + +define @revw_i64( %a, %pg, %b) { +; CHECK-LABEL: revw_i64: +; CHECK: revw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.revw.nxv2i64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.rbit.nxv16i8(, , ) +declare @llvm.aarch64.sve.rbit.nxv8i16(, , ) +declare @llvm.aarch64.sve.rbit.nxv4i32(, , ) +declare @llvm.aarch64.sve.rbit.nxv2i64(, , ) + +declare @llvm.aarch64.sve.revb.nxv8i16(, , ) +declare @llvm.aarch64.sve.revb.nxv4i32(, , ) +declare @llvm.aarch64.sve.revb.nxv2i64(, , ) + +declare @llvm.bswap.nxv8i16() +declare @llvm.bswap.nxv4i32() +declare @llvm.bswap.nxv2i64() + +declare @llvm.aarch64.sve.revh.nxv4i32(, , ) +declare @llvm.aarch64.sve.revh.nxv2i64(, , ) + +declare @llvm.aarch64.sve.revw.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll new file mode 100644 index 0000000000000..b1b3dc61560b4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll @@ -0,0 +1,367 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ASR +; + +define @asr_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_i8: +; CHECK: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @asr_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_i16: +; CHECK: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @asr_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_i32: +; CHECK: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @asr_i64( %pg, %a, %b) { +; CHECK-LABEL: asr_i64: +; CHECK: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @asr_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i8: +; CHECK: asr z0.b, p0/m, z0.b, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @asr_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i16: +; CHECK: asr z0.h, p0/m, z0.h, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.wide.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @asr_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i32: +; CHECK: asr z0.s, p0/m, z0.s, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.wide.nxv4i32( %pg, + %a, + %b) + ret %out +} + +; +; ASRD +; + +define @asrd_i8( %pg, %a) { +; CHECK-LABEL: asrd_i8: +; CHECK: asrd z0.b, p0/m, z0.b, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv16i8( %pg, + %a, + i32 1) + ret %out +} + +define @asrd_i16( %pg, %a) { +; CHECK-LABEL: asrd_i16: +; CHECK: asrd z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv8i16( %pg, + %a, + i32 2) + ret %out +} + +define @asrd_i32( %pg, %a) { +; CHECK-LABEL: asrd_i32: +; CHECK: asrd z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv4i32( %pg, + %a, + i32 31) + ret %out +} + +define @asrd_i64( %pg, %a) { +; CHECK-LABEL: asrd_i64: +; CHECK: asrd z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv2i64( %pg, + %a, + i32 64) + ret %out +} + +; +; INSR +; + +define @insr_i8( %a, i8 %b) { +; CHECK-LABEL: insr_i8: +; CHECK: insr z0.b, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %b) + ret %out +} + +define @insr_i16( %a, i16 %b) { +; CHECK-LABEL: insr_i16: +; CHECK: insr z0.h, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv8i16( %a, i16 %b) + ret %out +} + +define @insr_i32( %a, i32 %b) { +; CHECK-LABEL: insr_i32: +; CHECK: insr z0.s, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv4i32( %a, i32 %b) + ret %out +} + +define @insr_i64( %a, i64 %b) { +; CHECK-LABEL: insr_i64: +; CHECK: insr z0.d, x0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv2i64( %a, i64 %b) + ret %out +} + +define @insr_f16( %a, half %b) { +; CHECK-LABEL: insr_f16: +; CHECK: insr z0.h, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv8f16( %a, half %b) + ret %out +} + +define @insr_f32( %a, float %b) { +; CHECK-LABEL: insr_f32: +; CHECK: insr z0.s, s1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv4f32( %a, float %b) + ret %out +} + +define @insr_f64( %a, double %b) { +; CHECK-LABEL: insr_f64: +; CHECK: insr z0.d, d1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv2f64( %a, double %b) + ret %out +} + +; +; LSL +; + +define @lsl_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_i8: +; CHECK: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsl_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_i16: +; CHECK: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsl_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_i32: +; CHECK: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @lsl_i64( %pg, %a, %b) { +; CHECK-LABEL: lsl_i64: +; CHECK: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @lsl_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i8: +; CHECK: lsl z0.b, p0/m, z0.b, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsl_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i16: +; CHECK: lsl z0.h, p0/m, z0.h, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsl_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i32: +; CHECK: lsl z0.s, p0/m, z0.s, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %pg, + %a, + %b) + ret %out +} + +; +; LSR +; + +define @lsr_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_i8: +; CHECK: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsr_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_i16: +; CHECK: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsr_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_i32: +; CHECK: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @lsr_i64( %pg, %a, %b) { +; CHECK-LABEL: lsr_i64: +; CHECK: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @lsr_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i8: +; CHECK: lsr z0.b, p0/m, z0.b, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsr_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i16: +; CHECK: lsr z0.h, p0/m, z0.h, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsr_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i32: +; CHECK: lsr z0.s, p0/m, z0.s, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.asr.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.asrd.nxv16i8(, , i32) +declare @llvm.aarch64.sve.asrd.nxv8i16(, , i32) +declare @llvm.aarch64.sve.asrd.nxv4i32(, , i32) +declare @llvm.aarch64.sve.asrd.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.insr.nxv16i8(, i8) +declare @llvm.aarch64.sve.insr.nxv8i16(, i16) +declare @llvm.aarch64.sve.insr.nxv4i32(, i32) +declare @llvm.aarch64.sve.insr.nxv2i64(, i64) +declare @llvm.aarch64.sve.insr.nxv8f16(, half) +declare @llvm.aarch64.sve.insr.nxv4f32(, float) +declare @llvm.aarch64.sve.insr.nxv2f64(, double) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.wide.nxv4i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll new file mode 100644 index 0000000000000..0590c74d2efc9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -0,0 +1,309 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; WHILELE +; + +define @whilele_b_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilele_b_ww: +; CHECK: whilele p0.b, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilele_b_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilele_b_xx: +; CHECK: whilele p0.b, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilele_h_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilele_h_ww: +; CHECK: whilele p0.h, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilele_h_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilele_h_xx: +; CHECK: whilele p0.h, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilele_s_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilele_s_ww: +; CHECK: whilele p0.s, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilele_s_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilele_s_xx: +; CHECK: whilele p0.s, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilele_d_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilele_d_ww: +; CHECK: whilele p0.d, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilele_d_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilele_d_xx: +; CHECK: whilele p0.d, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b) + ret %out +} + +; +; WHILELO +; + +define @whilelo_b_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelo_b_ww: +; CHECK: whilelo p0.b, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelo_b_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelo_b_xx: +; CHECK: whilelo p0.b, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilelo_h_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelo_h_ww: +; CHECK: whilelo p0.h, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelo_h_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelo_h_xx: +; CHECK: whilelo p0.h, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilelo_s_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelo_s_ww: +; CHECK: whilelo p0.s, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelo_s_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelo_s_xx: +; CHECK: whilelo p0.s, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilelo_d_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelo_d_ww: +; CHECK: whilelo p0.d, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelo_d_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelo_d_xx: +; CHECK: whilelo p0.d, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b) + ret %out +} + +; +; WHILELS +; + +define @whilels_b_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilels_b_ww: +; CHECK: whilels p0.b, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilels_b_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilels_b_xx: +; CHECK: whilels p0.b, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilels_h_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilels_h_ww: +; CHECK: whilels p0.h, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilels_h_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilels_h_xx: +; CHECK: whilels p0.h, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilels_s_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilels_s_ww: +; CHECK: whilels p0.s, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilels_s_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilels_s_xx: +; CHECK: whilels p0.s, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilels_d_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilels_d_ww: +; CHECK: whilels p0.d, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilels_d_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilels_d_xx: +; CHECK: whilels p0.d, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b) + ret %out +} + +; +; WHILELT +; + +define @whilelt_b_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelt_b_ww: +; CHECK: whilelt p0.b, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelt_b_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelt_b_xx: +; CHECK: whilelt p0.b, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilelt_h_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelt_h_ww: +; CHECK: whilelt p0.h, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelt_h_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelt_h_xx: +; CHECK: whilelt p0.h, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilelt_s_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelt_s_ww: +; CHECK: whilelt p0.s, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelt_s_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelt_s_xx: +; CHECK: whilelt p0.s, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %a, i64 %b) + ret %out +} + +define @whilelt_d_ww(i32 %a, i32 %b) { +; CHECK-LABEL: whilelt_d_ww: +; CHECK: whilelt p0.d, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %a, i32 %b) + ret %out +} + +define @whilelt_d_xx(i64 %a, i64 %b) { +; CHECK-LABEL: whilelt_d_xx: +; CHECK: whilelt p0.d, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b) + ret %out +} + +declare @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilele.nxv8i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilele.nxv4i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilele.nxv2i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilele.nxv2i1.i64(i64, i64) + +declare @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64, i64) + +declare @llvm.aarch64.sve.whilels.nxv16i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilels.nxv16i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilels.nxv8i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilels.nxv8i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilels.nxv4i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilels.nxv2i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilels.nxv2i1.i64(i64, i64) + +declare @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32, i32) +declare @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-pred-log.ll b/llvm/test/CodeGen/AArch64/sve-pred-log.ll new file mode 100644 index 0000000000000..772e3f43b7c3d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-pred-log.ll @@ -0,0 +1,545 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @vselect_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: vselect_16: +; CHECK: sel p0.b, p0, p1.b, p2.b +; CHECK-NEXT: ret + %res = select %Pg, %Pn, %Pd + ret %res; +} + +define @vselect_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: vselect_8: +; CHECK: sel p0.b, p0, p1.b, p2.b +; CHECK-NEXT: ret + %res = select %Pg, %Pn, %Pd + ret %res; +} + +define @vselect_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: vselect_4: +; CHECK: sel p0.b, p0, p1.b, p2.b +; CHECK-NEXT: ret + %res = select %Pg, %Pn, %Pd + ret %res; +} + +define @vselect_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: vselect_2: +; CHECK: sel p0.b, p0, p1.b, p2.b +; CHECK-NEXT: ret + %res = select %Pg, %Pn, %Pd + ret %res; +} + +define @and_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: and_16: +; CHECK: and p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.and.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @and_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: and_8: +; CHECK: and p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.and.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @and_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: and_4: +; CHECK: and p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.and.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @and_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: and_2: +; CHECK: and p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.and.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + + +define @bic_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bic_16: +; CHECK: bic p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bic.pred.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @bic_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bic_8: +; CHECK: bic p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bic.pred.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @bic_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bic_4: +; CHECK: bic p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bic.pred.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @bic_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bic_2: +; CHECK: bic p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bic.pred.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eor_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eor_16: +; CHECK: eor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eor_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eor_8: +; CHECK: eor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eor_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eor_4: +; CHECK: eor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eor_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eor_2: +; CHECK: eor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @ands_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: ands_16: +; CHECK: ands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ands.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @ands_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: ands_8: +; CHECK: ands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ands.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @ands_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: ands_4: +; CHECK: ands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ands.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @ands_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: ands_2: +; CHECK: ands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.ands.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + + +define @bics_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bics_16: +; CHECK: bics p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bics.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @bics_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bics_8: +; CHECK: bics p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bics.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @bics_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bics_4: +; CHECK: bics p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bics.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @bics_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: bics_2: +; CHECK: bics p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bics.nxv2i1( %Pg, + %Pn, + %Pd) + ret %res; +} + + +define @eors_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eors_16: +; CHECK: eors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eors.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eors_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eors_8: +; CHECK: eors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eors.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eors_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eors_4: +; CHECK: eors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eors.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @eors_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: eors_2: +; CHECK: eors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eors.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + + +define @orr_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orr_16: +; CHECK: orr p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orr.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orr_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orr_8: +; CHECK: orr p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orr.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orr_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orr_4: +; CHECK: orr p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orr.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orr_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orr_2: +; CHECK: orr p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orr.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + + +define @orn_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orn_16: +; CHECK: orn p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orn.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orn_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orn_8: +; CHECK: orn p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orn.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orn_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orn_4: +; CHECK: orn p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orn.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orn_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orn_2: +; CHECK: orn p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orn.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nor_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nor_16: +; CHECK: nor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nor.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nor_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nor_8: +; CHECK: nor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nor.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nor_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nor_4: +; CHECK: nor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nor.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nor_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nor_2: +; CHECK: nor p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nor.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nand_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nand_16: +; CHECK: nand p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nand.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nand_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nand_8: +; CHECK: nand p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nand.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nand_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nand_4: +; CHECK: nand p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nand.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nand_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nand_2: +; CHECK: nand p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nand.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orrs_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orrs_16: +; CHECK: orrs p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orrs.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orrs_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orrs_8: +; CHECK: orrs p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orrs.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orrs_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orrs_4: +; CHECK: orrs p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orrs.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orrs_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orrs_2: +; CHECK: orrs p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orrs.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orns_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orns_16: +; CHECK: orns p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orns.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orns_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orns_8: +; CHECK: orns p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orns.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orns_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orns_4: +; CHECK: orns p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orns.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @orns_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: orns_2: +; CHECK: orns p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.orns.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nors_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nors_16: +; CHECK: nors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nors.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nors_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nors_8: +; CHECK: nors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nors.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nors_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nors_4: +; CHECK: nors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nors.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nors_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nors_2: +; CHECK: nors p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nors.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nands_16( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nands_16: +; CHECK: nands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nands.nxv16i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nands_8( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nands_8: +; CHECK: nands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nands.nxv8i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nands_4( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nands_4: +; CHECK: nands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nands.nxv4i1( %Pg, %Pn, %Pd) + ret %res; +} + +define @nands_2( %Pg, %Pn, %Pd) { +; CHECK-LABEL: nands_2: +; CHECK: nands p0.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nands.nxv2i1( %Pg, %Pn, %Pd) + ret %res; +} + +declare @llvm.aarch64.sve.and.nxv16i1(, , ) +declare @llvm.aarch64.sve.and.nxv8i1(, , ) +declare @llvm.aarch64.sve.and.nxv4i1(, , ) +declare @llvm.aarch64.sve.and.nxv2i1(, , ) +declare @llvm.aarch64.sve.bic.pred.nxv16i1(, , ) +declare @llvm.aarch64.sve.bic.pred.nxv8i1(, , ) +declare @llvm.aarch64.sve.bic.pred.nxv4i1(, , ) +declare @llvm.aarch64.sve.bic.pred.nxv2i1(, , ) +declare @llvm.aarch64.sve.eor.nxv16i1(, , ) +declare @llvm.aarch64.sve.eor.nxv8i1(, , ) +declare @llvm.aarch64.sve.eor.nxv4i1(, , ) +declare @llvm.aarch64.sve.eor.nxv2i1(, , ) +declare @llvm.aarch64.sve.ands.nxv16i1(, , ) +declare @llvm.aarch64.sve.ands.nxv8i1(, , ) +declare @llvm.aarch64.sve.ands.nxv4i1(, , ) +declare @llvm.aarch64.sve.ands.nxv2i1(, , ) +declare @llvm.aarch64.sve.bics.nxv16i1(, , ) +declare @llvm.aarch64.sve.bics.nxv8i1(, , ) +declare @llvm.aarch64.sve.bics.nxv4i1(, , ) +declare @llvm.aarch64.sve.bics.nxv2i1(, , ) +declare @llvm.aarch64.sve.eors.nxv16i1(, , ) +declare @llvm.aarch64.sve.eors.nxv8i1(, , ) +declare @llvm.aarch64.sve.eors.nxv4i1(, , ) +declare @llvm.aarch64.sve.eors.nxv2i1(, , ) +declare @llvm.aarch64.sve.orr.nxv16i1(, , ) +declare @llvm.aarch64.sve.orr.nxv8i1(, , ) +declare @llvm.aarch64.sve.orr.nxv4i1(, , ) +declare @llvm.aarch64.sve.orr.nxv2i1(, , ) +declare @llvm.aarch64.sve.orn.nxv16i1(, , ) +declare @llvm.aarch64.sve.orn.nxv8i1(, , ) +declare @llvm.aarch64.sve.orn.nxv4i1(, , ) +declare @llvm.aarch64.sve.orn.nxv2i1(, , ) +declare @llvm.aarch64.sve.nor.nxv16i1(, , ) +declare @llvm.aarch64.sve.nor.nxv8i1(, , ) +declare @llvm.aarch64.sve.nor.nxv4i1(, , ) +declare @llvm.aarch64.sve.nor.nxv2i1(, , ) +declare @llvm.aarch64.sve.nand.nxv16i1(, , ) +declare @llvm.aarch64.sve.nand.nxv8i1(, , ) +declare @llvm.aarch64.sve.nand.nxv4i1(, , ) +declare @llvm.aarch64.sve.nand.nxv2i1(, , ) +declare @llvm.aarch64.sve.orrs.nxv16i1(, , ) +declare @llvm.aarch64.sve.orrs.nxv8i1(, , ) +declare @llvm.aarch64.sve.orrs.nxv4i1(, , ) +declare @llvm.aarch64.sve.orrs.nxv2i1(, , ) +declare @llvm.aarch64.sve.orns.nxv16i1(, , ) +declare @llvm.aarch64.sve.orns.nxv8i1(, , ) +declare @llvm.aarch64.sve.orns.nxv4i1(, , ) +declare @llvm.aarch64.sve.orns.nxv2i1(, , ) +declare @llvm.aarch64.sve.nors.nxv16i1(, , ) +declare @llvm.aarch64.sve.nors.nxv8i1(, , ) +declare @llvm.aarch64.sve.nors.nxv4i1(, , ) +declare @llvm.aarch64.sve.nors.nxv2i1(, , ) +declare @llvm.aarch64.sve.nands.nxv16i1(, , ) +declare @llvm.aarch64.sve.nands.nxv8i1(, , ) +declare @llvm.aarch64.sve.nands.nxv4i1(, , ) +declare @llvm.aarch64.sve.nands.nxv2i1(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..4d110fee41c9f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; FCVTLT +; + +define @fcvtlt_f32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtlt_f32_f16: +; CHECK: fcvtlt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtlt.f32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtlt_f64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtlt_f64_f32: +; CHECK: fcvtlt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtlt.f64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTNT +; + +define @fcvtnt_f16_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtnt_f16_f32: +; CHECK: fcvtnt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.f16f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtnt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtnt_f32_f64: +; CHECK: fcvtnt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.f32f64( %a, + %pg, + %b) + ret %out +} + +; +; FCVTX +; + +define @fcvtx_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtx_f32_f64: +; CHECK: fcvtx z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtx.f32f64( %a, + %pg, + %b) + ret %out +} + +; +; FCVTXNT +; + +define @fcvtxnt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtxnt_f32_f64: +; CHECK: fcvtxnt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.fcvtlt.f32f16(, , ) +declare @llvm.aarch64.sve.fcvtlt.f64f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.f16f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.f32f64(, , ) +declare @llvm.aarch64.sve.fcvtx.f32f64(, , ) +declare @llvm.aarch64.sve.fcvtxnt.f32f64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll new file mode 100644 index 0000000000000..fe12324a4e0a7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll @@ -0,0 +1,39 @@ +;RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s + +; +; FLOGB +; + +define @flogb_f16( %a, %pg, %b) { +; CHECK-LABEL: flogb_f16: +; CHECK: flogb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.flogb.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @flogb_f32( %a, %pg, %b) { +; CHECK-LABEL: flogb_f32: +; CHECK: flogb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.flogb.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @flogb_f64( %a, %pg, %b) { +; CHECK-LABEL: flogb_f64: +; CHECK: flogb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.flogb.nxv2f64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.flogb.nxv8f16(, , ) +declare @llvm.aarch64.sve.flogb.nxv4f32(, , ) +declare @llvm.aarch64.sve.flogb.nxv2f64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll new file mode 100644 index 0000000000000..12cc12ccadfc2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll @@ -0,0 +1,127 @@ +;RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; FMLALB (Vectors) +; + +define @fmlalb_h( %a, %b, %c) { +; CHECK-LABEL: fmlalb_h: +; CHECK: fmlalb z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlalb.nxv4f32( %a, + %b, + %c) + ret %out +} + +; +; FMLALB (Indexed) +; + +define @fmlalb_lane_h( %a, %b, %c) { +; CHECK-LABEL: fmlalb_lane_h: +; CHECK: fmlalb z0.s, z1.h, z2.h[0] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlalb.lane.nxv4f32( %a, + %b, + %c, + i32 0) + ret %out +} + +; +; FMLALT (Vectors) +; + +define @fmlalt_h( %a, %b, %c) { +; CHECK-LABEL: fmlalt_h: +; CHECK: fmlalt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlalt.nxv4f32( %a, + %b, + %c) + ret %out +} + +; +; FMLALT (Indexed) +; + +define @fmlalt_lane_h( %a, %b, %c) { +; CHECK-LABEL: fmlalt_lane_h: +; CHECK: fmlalt z0.s, z1.h, z2.h[1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlalt.lane.nxv4f32( %a, + %b, + %c, + i32 1) + ret %out +} + +; +; FMLSLB (Vectors) +; + +define @fmlslb_h( %a, %b, %c) { +; CHECK-LABEL: fmlslb_h: +; CHECK: fmlslb z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlslb.nxv4f32( %a, + %b, + %c) + ret %out +} + +; +; FMLSLB (Indexed) +; + +define @fmlslb_lane_h( %a, %b, %c) { +; CHECK-LABEL: fmlslb_lane_h: +; CHECK: fmlslb z0.s, z1.h, z2.h[2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlslb.lane.nxv4f32( %a, + %b, + %c, + i32 2) + ret %out +} + +; +; FMLSLT (Vectors) +; + +define @fmlslt_h( %a, %b, %c) { +; CHECK-LABEL: fmlslt_h: +; CHECK: fmlslt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlslt.nxv4f32( %a, + %b, + %c) + ret %out +} + +; +; FMLSLT (Indexed) +; + +define @fmlslt_lane_h( %a, %b, %c) { +; CHECK-LABEL: fmlslt_lane_h: +; CHECK: fmlslt z0.s, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmlslt.lane.nxv4f32( %a, + %b, + %c, + i32 3) + ret %out +} + +declare @llvm.aarch64.sve.fmlalb.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmlalb.lane.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fmlalt.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmlalt.lane.nxv4f32(, , , i32) + +declare @llvm.aarch64.sve.fmlslb.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmlslb.lane.nxv4f32(, , , i32) +declare @llvm.aarch64.sve.fmlslt.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmlslt.lane.nxv4f32(, , , i32) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll new file mode 100644 index 0000000000000..055c24b935e08 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll @@ -0,0 +1,191 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; FADDP +; + +define @faddp_f16( %pg, %a, %b) { +; CHECK-LABEL: faddp_f16: +; CHECK: faddp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.faddp.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @faddp_f32( %pg, %a, %b) { +; CHECK-LABEL: faddp_f32: +; CHECK: faddp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.faddp.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @faddp_f64( %pg, %a, %b) { +; CHECK-LABEL: faddp_f64: +; CHECK: faddp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.faddp.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMAXP +; + +define @fmaxp_f16( %pg, %a, %b) { +; CHECK-LABEL: fmaxp_f16: +; CHECK: fmaxp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxp.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmaxp_f32( %pg, %a, %b) { +; CHECK-LABEL: fmaxp_f32: +; CHECK: fmaxp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxp.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmaxp_f64( %pg, %a, %b) { +; CHECK-LABEL: fmaxp_f64: +; CHECK: fmaxp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxp.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMAXNMP +; + +define @fmaxnmp_f16( %pg, %a, %b) { +; CHECK-LABEL: fmaxnmp_f16: +; CHECK: fmaxnmp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnmp.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fmaxnmp_f32( %pg, %a, %b) { +; CHECK-LABEL: fmaxnmp_f32: +; CHECK: fmaxnmp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnmp.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fmaxnmp_f64( %pg, %a, %b) { +; CHECK-LABEL: fmaxnmp_f64: +; CHECK: fmaxnmp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fmaxnmp.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMINP +; + +define @fminp_f16( %pg, %a, %b) { +; CHECK-LABEL: fminp_f16: +; CHECK: fminp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminp.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fminp_f32( %pg, %a, %b) { +; CHECK-LABEL: fminp_f32: +; CHECK: fminp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminp.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fminp_f64( %pg, %a, %b) { +; CHECK-LABEL: fminp_f64: +; CHECK: fminp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminp.nxv2f64( %pg, + %a, + %b) + ret %out +} + +; +; FMINNMP +; + +define @fminnmp_f16( %pg, %a, %b) { +; CHECK-LABEL: fminnmp_f16: +; CHECK: fminnmp z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnmp.nxv8f16( %pg, + %a, + %b) + ret %out +} + +define @fminnmp_f32( %pg, %a, %b) { +; CHECK-LABEL: fminnmp_f32: +; CHECK: fminnmp z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnmp.nxv4f32( %pg, + %a, + %b) + ret %out +} + +define @fminnmp_f64( %pg, %a, %b) { +; CHECK-LABEL: fminnmp_f64: +; CHECK: fminnmp z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fminnmp.nxv2f64( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.faddp.nxv8f16(, , ) +declare @llvm.aarch64.sve.faddp.nxv4f32(, , ) +declare @llvm.aarch64.sve.faddp.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxp.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxp.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxp.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxnmp.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxnmp.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxnmp.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminp.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminp.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminp.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminnmp.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminnmp.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminnmp.nxv2f64(, , ) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir new file mode 100644 index 0000000000000..54849b4c651d1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir @@ -0,0 +1,943 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: mfma_f32_32x32x1f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x1f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_32x32x1f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x1f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_16x16x1f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x1f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x1f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x1f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_4x4x1f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x1f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_4x4x1f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x1f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x2f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x2f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_32x32x2f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x2f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x4f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x4f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_32x32x4f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x4f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_4x4x4f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x4f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_4x4x4f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x4f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x8f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x8f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_32x32x8f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x8f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x16f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x16f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_16x16x16f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x16f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_32x32x4i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_i32_32x32x4i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_i32_32x32x4i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_i32_32x32x4i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_i32_16x16x4i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_i32_16x16x4i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_16x16x4i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_i32_16x16x4i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_4x4x4i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_i32_4x4x4i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_4x4x4i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_i32_4x4x4i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_32x32x8i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_i32_32x32x8i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_32x32x8i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_i32_32x32x8i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_16x16x16i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_i32_16x16x16i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_16x16x16i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_i32_16x16x16i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x2bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x2bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_32x32x2bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x2bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_16x16x2bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x2bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x2bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x2bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_4x4x2bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x2bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_4x4x2bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x2bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x4bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x4bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_32x32x4bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x4bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x8bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x8bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_16x16x8bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x8bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 0dec67ad340cd..895539c00bce9 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -16,29 +16,28 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 -; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5 ; CHECK-NEXT: s_branch BB0_3 ; CHECK-NEXT: BB0_1: ; %Flow1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_mov_b64 s[10:11], 0 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_and_b64 s[4:5], s[10:11], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3] +; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3] ; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_cmp_lt_u32 s0, 32 -; CHECK-NEXT: s_mov_b64 s[10:11], -1 +; CHECK-NEXT: s_mov_b64 s[8:9], -1 ; CHECK-NEXT: s_cbranch_scc0 BB0_2 ; CHECK-NEXT: ; %bb.4: ; %endif1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -53,9 +52,9 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 ; CHECK-NEXT: s_branch BB0_1 ; CHECK-NEXT: BB0_6: ; %Flow2 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] +; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] ; CHECK-NEXT: ; mask branch BB0_8 ; CHECK-NEXT: BB0_7: ; %if1 ; CHECK-NEXT: v_sqrt_f32_e32 v1, v0 @@ -63,6 +62,7 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm + ; this is the divergent branch with the condition not marked as divergent start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir new file mode 100644 index 0000000000000..754536577faec --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir @@ -0,0 +1,71 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: test_part_fold{{$}} +# GCN: %2:sreg_32 = S_ADD_I32 70, %1 +--- +name: test_part_fold +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 80 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc +... + +# GCN-LABEL: name: test_inline_const{{$}} +# GCN: %2:sreg_32 = S_ADD_I32 70, 63 +--- +name: test_inline_const +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 63 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc +... +# GCN-LABEL: name: test_obscure{{$}} +# GCN: %2:sreg_32 = S_LSHL2_ADD_U32 70, %1 +--- +name: test_obscure +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 80 + %2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc +... +# GCN-LABEL: name: test_obscure_inline{{$}} +# GCN: %2:sreg_32 = S_LSHL2_ADD_U32 70, 63 +--- +name: test_obscure_inline +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 63 + %2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc +... +# GCN-LABEL: name: test_frameindex{{$}} +# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, %0 +--- +name: test_frameindex +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16} +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def $scc +... +# GCN-LABEL: name: test_frameindex_inline{{$}} +# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, 63 +--- +name: test_frameindex_inline +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16} +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 63 + %1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def $scc +... diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index fff1c22918ec6..51d1c091ab913 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,11 +3,10 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; -; SI: [[LOOP:BB0_[0-9]+]]: ; %Flow1 -; SI: s_or_b64 exec, exec, [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]] ; SI: ; %Flow +; SI: s_or_b64 [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]] ; SI: s_and_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_MASK:s\[[0-9]+:[0-9]+\]]], exec -; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], s[6:7], [[ACCUM_MASK]] +; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[ACCUM_MASK]] ; SI: s_cbranch_execz [[FOR_END_LABEL:BB0_[0-9]+]] ; SI: ; %for.body diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 7b34d873f7a74..25742666a5794 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -1297,8 +1297,30 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vecarg: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 -; GCN-COUNT-8: global_load_dwordx4 -; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-COUNT-32: v_accvgpr_read_b32 ; GCN-COUNT-8: global_store_dwordx4 diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 46c4b1e6b3a1c..684b183de690c 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -40,10 +40,9 @@ ; GCN: [[FLOW]]: ; %Flow ; GCN: ; in Loop: Header=BB0_1 Depth=1 -; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]] -; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]] -; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]] -; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]] +; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]] +; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]] +; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]] ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] ; GCN: ; %bb.4: ; %bb9 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index 08d8ec0fba4dc..5222ae56db87a 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -25,22 +25,20 @@ ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 exec, exec, [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]] ; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_or_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_andn2_b64 exec, exec, [[LEFT_OUTER]] ; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} -; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[LEFT_INNER]], 0{{$}} ; GCN: ; %Flow ; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] -; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[LEFT_INNER]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_andn2_b64 exec, exec, [[LEFT_INNER]] ; GCN: s_cbranch_execz [[FLOW2]] ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} @@ -82,17 +80,17 @@ ENDIF: ; preds = %LOOP ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: -; GCN: s_mov_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[SAVED_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %Flow4 -; GCN: s_and_b64 [[BROKEN_THREADS_MASK]], exec, [[BROKEN_THREADS_MASK]] -; GCN: s_or_b64 [[BROKEN_THREADS_MASK]], [[BROKEN_THREADS_MASK]], [[SAVED:s\[[0-9]+:[0-9]+\]]] -; GCN: s_andn2_b64 exec, exec, [[BROKEN_THREADS_MASK]] +; GCN: s_and_b64 [[ANDTMP0:s\[[0-9]+:[0-9]+\]]], exec, {{s\[[0-9]+:[0-9]+\]}} +; GCN: s_or_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], [[ANDTMP0]], [[SAVED_MASK]] +; GCN: s_and_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, exec +; GCN: s_andn2_b64 exec, exec, [[MASK1]] ; GCN-NEXT: s_cbranch_execz [[LOOP_EXIT:BB[0-9]+_[0-9]+]] ; GCN: ; %bb1{{$}} ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], -; GCN: s_mov_b64 [[SAVED]], [[BROKEN_THREADS_MASK]] ; GCN: ; %LeafBlock1 ; GCN: v_cmp_eq_u32_e32 vcc, 1, [[LOAD0]] @@ -122,7 +120,7 @@ ENDIF: ; preds = %LOOP ; GCN: s_branch [[LOOP]] ; GCN: [[LOOP_EXIT]]: ; %Flow6 -; GCN: s_or_b64 exec, exec, [[BROKEN_THREADS_MASK]] +; GCN: s_or_b64 exec, exec, [[SAVED_MASK]] define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index dc7a7c804bee1..1c7adc39fe290 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -91,3 +91,86 @@ body: | $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31 S_ENDPGM 0, implicit $vcc ... + +# When only one 64-bit SGPR is available for the unused carry out pre gfx9, +# we must reuse one of the 32-bit SGPR sub-regs to materialize the offset. + +--- +name: scavenge_sgpr_pei_one_sgpr_64 +tracksRegLiveness: true + +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } + +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr1 + + ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 + ; CHECK: liveins: $vgpr1 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 + ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + ; CHECK: $sgpr28 = S_SUB_U32 $sgpr33, $sgpr34, implicit-def $scc + ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, killed $sgpr28, implicit $exec + ; CHECK: $sgpr28 = S_MOV_B32 8192 + ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr3, 0, implicit $exec + ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 + ; CHECK: S_ENDPGM 0, implicit $vcc + S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + S_ENDPGM 0, implicit $vcc +... + +# Prefer to use vcc as unused carry out. + +--- +name: scavenge_sgpr_pei_prefer_vcc +tracksRegLiveness: true + +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } + +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr1 + + ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc + ; CHECK: liveins: $vgpr1 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 + ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 + ; CHECK: $vcc_hi = S_SUB_U32 $sgpr33, $sgpr34, implicit-def $scc + ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, killed $vcc_hi, implicit $exec + ; CHECK: $vcc_lo = S_MOV_B32 8192 + ; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec + ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 + ; CHECK: S_ENDPGM 0 + S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 + $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index 14d78fbef29ea..23bb18e738f54 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -37,9 +37,8 @@ ENDIF: ; SI: ; %endif ; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop -; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]] ; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]] -; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]] +; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]] ; SI: s_andn2_b64 exec, exec, [[LEFT]] ; SI: s_cbranch_execnz [[LOOP_LABEL]] ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index ef17825024eda..ea74268dbe7c2 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -223,9 +223,8 @@ exit: ; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] ; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] ; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], -; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]] -; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]] -; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]] +; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]] +; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 91a993181979d..92808fec360f4 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -243,14 +243,12 @@ bb13: ; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]] ; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]] ; GCN: BB{{.*}}: ; %Flow -; GFX1032: s_and_b32 [[MASK0:s[0-9]+]], exec_lo, [[MASK1]] -; GFX1064: s_and_b64 [[MASK0:s\[[0-9:]+\]]], exec, [[MASK1]] -; GFX1032: s_or_b32 [[MASK0]], [[MASK0]], [[ACC:s[0-9]+]] -; GFX1064: s_or_b64 [[MASK0]], [[MASK0]], [[ACC:s\[[0-9:]+\]]] -; GFX1032: s_mov_b32 [[ACC]], [[MASK0]] -; GFX1064: s_mov_b64 [[ACC]], [[MASK0]] -; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[MASK0]] -; GFX1064: s_andn2_b64 exec, exec, [[MASK0]] +; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]] +; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]] +; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]] +; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]] +; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]] +; GFX1064: s_andn2_b64 exec, exec, [[ACC]] ; GCN: s_cbranch_execz ; GCN: BB{{.*}}: ; GCN: s_load_dword [[LOAD:s[0-9]+]] diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-calls.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-calls.ll deleted file mode 100644 index 8d58c8e69a556..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-calls.ll +++ /dev/null @@ -1,230 +0,0 @@ -; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s - -; Check that the pass doesn't try to promote the immediate parameters. -; CHECK-LABEL: call_with_imms -; CHECK-NOT: uxt -define i8 @call_with_imms(i8* %arg) { - %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0) - %cmp = icmp eq i8 %call, 0 - %res = select i1 %cmp, i8 %call, i8 1 - ret i8 %res -} - -; Test that the call result is still extended. -; CHECK-LABEL: test_call: -; CHECK: bl -; CHECK-NEXT: sxtb r1, r0 -define i16 @test_call(i8 zeroext %arg) { - %call = call i8 @dummy_i8(i8 %arg) - %cmp = icmp ult i8 %call, 128 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; CHECK-LABEL: promote_i8_sink_i16_1 -; CHECK: bl dummy_i8 -; CHECK: add{{.*}} r0, #1 -; CHECK-NOT: uxt -; CHECK: cmp r0 -define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) { - %call = tail call zeroext i8 @dummy_i8(i8 %arg0) - %add = add nuw i8 %call, 1 - %conv = zext i8 %add to i16 - %cmp = icmp ne i16 %conv, %arg1 - %sel = select i1 %cmp, i16 %arg1, i16 %arg2 - %res = tail call zeroext i16 @dummy3(i16 %sel) - ret i16 %res -} - -; CHECK-LABEL: promote_i8_sink_i16_2 -; CHECK: bl dummy_i8 -; CHECK: add{{.*}} r0, #1 -; CHECK-NOT: uxt -; CHECK: cmp r0 -define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) { - %call = tail call zeroext i8 @dummy_i8(i8 %arg0) - %add = add nuw i8 %call, 1 - %cmp = icmp ne i8 %add, %arg1 - %conv = zext i8 %arg1 to i16 - %sel = select i1 %cmp, i16 %conv, i16 %arg2 - %res = tail call zeroext i16 @dummy3(i16 %sel) - ret i16 %res -} - -@uc = global i8 42, align 1 -@LL = global i64 0, align 8 - -; CHECK-LABEL: zext_i64 -; CHECK: ldrb -; CHECK: strd -define void @zext_i64() { -entry: - %0 = load i8, i8* @uc, align 1 - %conv = zext i8 %0 to i64 - store i64 %conv, i64* @LL, align 8 - %cmp = icmp eq i8 %0, 42 - %conv1 = zext i1 %cmp to i32 - %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1) - ret void -} - -@a = global i16* null, align 4 -@b = global i32 0, align 4 - -; CHECK-LABEL: constexpr -; CHECK: uxth -define i32 @constexpr() { -entry: - store i32 ptrtoint (i32* @b to i32), i32* @b, align 4 - %0 = load i16*, i16** @a, align 4 - %1 = load i16, i16* %0, align 2 - %or = or i16 %1, ptrtoint (i32* @b to i16) - store i16 %or, i16* %0, align 2 - %cmp = icmp ne i16 %or, 4 - %conv3 = zext i1 %cmp to i32 - %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2 - ret i32 undef -} - -; The call to safe_lshift_func takes two parameters, but they're the same value -; just one is zext. We do support zext now, so the transformation should -; trigger and we don't want see uxtb here. -; CHECK-LABEL: call_zext_i8_i32 -; CHECK-NOT: uxt -define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) { -for.cond8.preheader: - %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef) - %tobool219 = icmp eq i8 %call217, 0 - br i1 %tobool219, label %for.end411, label %for.cond273.preheader - -for.cond273.preheader: ; preds = %for.cond8.preheader - %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ] - %conv218.le = zext i8 %call217.lcssa to i32 - %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le) - unreachable - -for.end411: ; preds = %for.cond8.preheader - %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4) - unreachable -} - -%struct.anon = type { i32 } - -@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 -@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 -@g_82 = hidden local_unnamed_addr global i32 0, align 4 - -; Test that the transform bails on finding %conv4, a trunc -; CHECK-LABEL: call_return_pointer -; CHECK: sxth -; CHECK: uxt -define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 { -entry: - %conv1 = zext i8 %p_13 to i16 - %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef) - %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4 - %conv2 = trunc i32 %0 to i16 - br label %for.cond - -for.cond: ; preds = %for.cond.backedge, %entry - %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ] - %tobool = icmp eq i8 %p_13.addr.0, 0 - br i1 %tobool, label %for.cond.backedge, label %if.then - -for.cond.backedge: ; preds = %for.cond, %if.then - %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ] - br label %for.cond - -if.then: ; preds = %for.cond - %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2) - %conv4 = trunc i16 %call3 to i8 - br label %for.cond.backedge -} - -; Check that d.sroa.0.0.be is promoted passed directly into the tail call. -; CHECK-LABEL: check_zext_phi_call_arg -; CHECK-NOT: uxt -define i32 @check_zext_phi_call_arg() { -entry: - br label %for.cond - -for.cond: ; preds = %for.cond.backedge, %entry - %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ] - %tobool = icmp eq i16 %d.sroa.0.0, 0 - br i1 %tobool, label %for.cond.backedge, label %if.then - -for.cond.backedge: ; preds = %for.cond, %if.then - %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ] - br label %for.cond - -if.then: ; preds = %for.cond - %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32 - %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2 - br label %for.cond.backedge -} - -%struct.atomic_flag = type { i8 } - -; CHECK-LABEL: atomic_flag_test_and_set -; CHECK-NOT: uxt -define zeroext i1 @atomic_flag_test_and_set(%struct.atomic_flag* %object) { -entry: - %_Value = getelementptr inbounds %struct.atomic_flag, %struct.atomic_flag* %object, i32 0, i32 0 - %call = tail call arm_aapcscc zeroext i8 @__atomic_exchange_1(i8* %_Value, i8 zeroext 1, i32 5) #1 - %0 = and i8 %call, 1 - %tobool = icmp ne i8 %0, 0 - ret i1 %tobool -} - -; CHECK-LABEL: i1_zeroext_call -; CHECK: uxt -define i1 @i1_zeroext_call(i16* %ts, i32 %a, i16* %b, i8* %c) { -entry: - %0 = load i16, i16* %ts, align 2 - %conv.i860 = trunc i32 %a to i16 - store i16 %conv.i860, i16* %b, align 2 - %call.i848 = call zeroext i1 @i1_zeroext(i8* %c, i32 64, i16 zeroext %conv.i860) - br i1 %call.i848, label %if.then223, label %if.else227 - -if.then223: - %cmp235 = icmp eq i16 %0, %conv.i860 - br label %exit - -if.else227: - %cmp236 = icmp ult i16 %0, %conv.i860 - br label %exit - -exit: - %retval = phi i1 [ %cmp235, %if.then223 ], [ %cmp236, %if.else227 ] - ret i1 %retval -} - -; CHECK-LABEL: promote_arg_pass_to_call -; CHECK: uxtb -define i16 @promote_arg_pass_to_call(i16 zeroext %arg1, i16 zeroext %arg2) { - %conv = add nuw i16 %arg1, 15 - %mul = mul nuw nsw i16 %conv, 3 - %cmp = icmp ult i16 %mul, %arg2 - %trunc = trunc i16 %arg1 to i8 - %res = call zeroext i16 @dummy4(i1 %cmp, i8 %trunc, i16 %arg1) - ret i16 %res -} - - -declare i32 @assert(...) -declare i8 @dummy_i8(i8) -declare i8 @dummy2(i8*, i8, i8) -declare i16 @dummy3(i16) -declare i16 @dummy4(i1, i8, i16) - -declare dso_local i32 @e(...) local_unnamed_addr #1 -declare dso_local zeroext i16 @f(...) local_unnamed_addr #1 -declare dso_local arm_aapcscc i8 @__atomic_exchange_1(i8*, i8, i32) local_unnamed_addr - -declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66) -declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2) -declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64) -declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32) -declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext) -declare i1 @i1_zeroext(i8*, i32, i16 zeroext) diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll deleted file mode 100644 index 538f110ffd767..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ /dev/null @@ -1,2243 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NODSP --check-prefix=CHECK-NODSP-V8 -; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NODSP --check-prefix=CHECK-NODSP-V7 -; RUN: llc -mtriple=thumbv7em -mcpu=cortex-m7 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DSP-IMM - -; Transform will fail because the trunc is not a sink. - -define i16 @dsp_trunc(i32 %arg0, i32 %arg1, i16* %gep0, i16* %gep1) { -; CHECK-NODSP-V8-LABEL: dsp_trunc: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: add r1, r0 -; CHECK-NODSP-V8-NEXT: ldrh r0, [r2] -; CHECK-NODSP-V8-NEXT: ldrh r2, [r3] -; CHECK-NODSP-V8-NEXT: add r0, r1 -; CHECK-NODSP-V8-NEXT: subs r1, r2, r1 -; CHECK-NODSP-V8-NEXT: uxth r3, r0 -; CHECK-NODSP-V8-NEXT: uxth r2, r1 -; CHECK-NODSP-V8-NEXT: cmp r3, r2 -; CHECK-NODSP-V8-NEXT: it lo -; CHECK-NODSP-V8-NEXT: movlo r0, r1 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: dsp_trunc: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrh r2, [r2] -; CHECK-NODSP-V7-NEXT: add r1, r0 -; CHECK-NODSP-V7-NEXT: ldrh r3, [r3] -; CHECK-NODSP-V7-NEXT: adds r0, r2, r1 -; CHECK-NODSP-V7-NEXT: subs r1, r3, r1 -; CHECK-NODSP-V7-NEXT: uxth r3, r0 -; CHECK-NODSP-V7-NEXT: uxth r2, r1 -; CHECK-NODSP-V7-NEXT: cmp r3, r2 -; CHECK-NODSP-V7-NEXT: it lo -; CHECK-NODSP-V7-NEXT: movlo r0, r1 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: dsp_trunc: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: add r0, r1 -; CHECK-DSP-NEXT: ldrh r1, [r3] -; CHECK-DSP-NEXT: ldrh r2, [r2] -; CHECK-DSP-NEXT: subs r1, r1, r0 -; CHECK-DSP-NEXT: add r0, r2 -; CHECK-DSP-NEXT: uxth r3, r1 -; CHECK-DSP-NEXT: uxth r2, r0 -; CHECK-DSP-NEXT: cmp r2, r3 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, r1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: dsp_trunc: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: add r0, r1 -; CHECK-DSP-IMM-NEXT: movs r1, #0 -; CHECK-DSP-IMM-NEXT: uxth r0, r0 -; CHECK-DSP-IMM-NEXT: usub16 r1, r1, r0 -; CHECK-DSP-IMM-NEXT: ldrh r0, [r2] -; CHECK-DSP-IMM-NEXT: ldrh r3, [r3] -; CHECK-DSP-IMM-NEXT: usub16 r0, r0, r1 -; CHECK-DSP-IMM-NEXT: uadd16 r1, r3, r1 -; CHECK-DSP-IMM-NEXT: cmp r0, r1 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r1 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %add0 = add i32 %arg0, %arg1 - %conv0 = trunc i32 %add0 to i16 - %sub0 = sub i16 0, %conv0 - %load0 = load i16, i16* %gep0, align 2 - %load1 = load i16, i16* %gep1, align 2 - %sub1 = sub i16 %load0, %sub0 - %add1 = add i16 %load1, %sub0 - %cmp = icmp ult i16 %sub1, %add1 - %res = select i1 %cmp, i16 %add1, i16 %sub1 - ret i16 %res -} - -define i8 @trunc_i16_i8(i16* %ptr, i16 zeroext %arg0, i8 zeroext %arg1) { -; CHECK-LABEL: trunc_i16_i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ldrh r0, [r0] -; CHECK-NEXT: add r0, r1 -; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r2 -; CHECK-NEXT: bx lr -entry: - %0 = load i16, i16* %ptr - %1 = add i16 %0, %arg0 - %2 = trunc i16 %1 to i8 - %3 = icmp ugt i8 %2, %arg1 - %4 = select i1 %3, i8 %2, i8 %arg1 - ret i8 %4 -} - -; The pass perform the transform, but a uxtb will still be inserted to handle -; the zext to the icmp. -define i8 @icmp_i32_zext(i8* %ptr) { -; CHECK-NODSP-V8-LABEL: icmp_i32_zext: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrb r2, [r0], #1 -; CHECK-NODSP-V8-NEXT: subs r1, r2, #1 -; CHECK-NODSP-V8-NEXT: .p2align 2 -; CHECK-NODSP-V8-NEXT: .LBB2_1: @ %body -; CHECK-NODSP-V8-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NODSP-V8-NEXT: uxtb r3, r1 -; CHECK-NODSP-V8-NEXT: cmp r2, r3 -; CHECK-NODSP-V8-NEXT: itt ne -; CHECK-NODSP-V8-NEXT: movne r0, r1 -; CHECK-NODSP-V8-NEXT: bxne lr -; CHECK-NODSP-V8-NEXT: ldrb r1, [r0, r2] -; CHECK-NODSP-V8-NEXT: adds r2, #1 -; CHECK-NODSP-V8-NEXT: b .LBB2_1 -; -; CHECK-NODSP-V7-LABEL: icmp_i32_zext: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrb r2, [r0], #1 -; CHECK-NODSP-V7-NEXT: subs r1, r2, #1 -; CHECK-NODSP-V7-NEXT: .LBB2_1: @ %body -; CHECK-NODSP-V7-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NODSP-V7-NEXT: uxtb r3, r1 -; CHECK-NODSP-V7-NEXT: cmp r2, r3 -; CHECK-NODSP-V7-NEXT: itt ne -; CHECK-NODSP-V7-NEXT: movne r0, r1 -; CHECK-NODSP-V7-NEXT: bxne lr -; CHECK-NODSP-V7-NEXT: ldrb r1, [r0, r2] -; CHECK-NODSP-V7-NEXT: adds r2, #1 -; CHECK-NODSP-V7-NEXT: b .LBB2_1 -; -; CHECK-DSP-LABEL: icmp_i32_zext: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrb r2, [r0], #1 -; CHECK-DSP-NEXT: subs r1, r2, #1 -; CHECK-DSP-NEXT: .LBB2_1: @ %body -; CHECK-DSP-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-DSP-NEXT: uxtb r3, r1 -; CHECK-DSP-NEXT: cmp r2, r3 -; CHECK-DSP-NEXT: itt ne -; CHECK-DSP-NEXT: movne r0, r1 -; CHECK-DSP-NEXT: bxne lr -; CHECK-DSP-NEXT: ldrb r1, [r0, r2] -; CHECK-DSP-NEXT: adds r2, #1 -; CHECK-DSP-NEXT: b .LBB2_1 -; -; CHECK-DSP-IMM-LABEL: icmp_i32_zext: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrb r2, [r0], #1 -; CHECK-DSP-IMM-NEXT: subs r1, r2, #1 -; CHECK-DSP-IMM-NEXT: .LBB2_1: @ %body -; CHECK-DSP-IMM-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-DSP-IMM-NEXT: uxtb r3, r1 -; CHECK-DSP-IMM-NEXT: cmp r2, r3 -; CHECK-DSP-IMM-NEXT: bne .LBB2_3 -; CHECK-DSP-IMM-NEXT: @ %bb.2: @ %if.end -; CHECK-DSP-IMM-NEXT: @ in Loop: Header=BB2_1 Depth=1 -; CHECK-DSP-IMM-NEXT: ldrb r1, [r0, r2] -; CHECK-DSP-IMM-NEXT: adds r2, #1 -; CHECK-DSP-IMM-NEXT: b .LBB2_1 -; CHECK-DSP-IMM-NEXT: .LBB2_3: @ %exit -; CHECK-DSP-IMM-NEXT: mov r0, r1 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %gep = getelementptr inbounds i8, i8* %ptr, i32 0 - %0 = load i8, i8* %gep, align 1 - %1 = sub nuw nsw i8 %0, 1 - %conv44 = zext i8 %0 to i32 - br label %preheader - -preheader: - br label %body - -body: - %2 = phi i8 [ %1, %preheader ], [ %3, %if.end ] - %si.0274 = phi i32 [ %conv44, %preheader ], [ %inc, %if.end ] - %conv51266 = zext i8 %2 to i32 - %cmp52267 = icmp eq i32 %si.0274, %conv51266 - br i1 %cmp52267, label %if.end, label %exit - -if.end: - %inc = add i32 %si.0274, 1 - %gep1 = getelementptr inbounds i8, i8* %ptr, i32 %inc - %3 = load i8, i8* %gep1, align 1 - br label %body - -exit: - ret i8 %2 -} - -; Won't don't handle sext -define i32 @icmp_sext_zext_store_i8_i16() { -; CHECK-NODSP-V8-LABEL: icmp_sext_zext_store_i8_i16: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: movw r0, :lower16:d_uch -; CHECK-NODSP-V8-NEXT: movt r0, :upper16:d_uch -; CHECK-NODSP-V8-NEXT: ldrb r1, [r0, #2] -; CHECK-NODSP-V8-NEXT: movw r0, :lower16:d_sh -; CHECK-NODSP-V8-NEXT: movt r0, :upper16:d_sh -; CHECK-NODSP-V8-NEXT: ldrsh.w r0, [r0, #4] -; CHECK-NODSP-V8-NEXT: movw r2, :lower16:sh1 -; CHECK-NODSP-V8-NEXT: subs r0, r1, r0 -; CHECK-NODSP-V8-NEXT: clz r0, r0 -; CHECK-NODSP-V8-NEXT: movt r2, :upper16:sh1 -; CHECK-NODSP-V8-NEXT: lsrs r0, r0, #5 -; CHECK-NODSP-V8-NEXT: strh r1, [r2] -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: icmp_sext_zext_store_i8_i16: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: movw r0, :lower16:d_sh -; CHECK-NODSP-V7-NEXT: movw r1, :lower16:d_uch -; CHECK-NODSP-V7-NEXT: movt r0, :upper16:d_sh -; CHECK-NODSP-V7-NEXT: movt r1, :upper16:d_uch -; CHECK-NODSP-V7-NEXT: ldrb r1, [r1, #2] -; CHECK-NODSP-V7-NEXT: movw r2, :lower16:sh1 -; CHECK-NODSP-V7-NEXT: ldrsh.w r0, [r0, #4] -; CHECK-NODSP-V7-NEXT: movt r2, :upper16:sh1 -; CHECK-NODSP-V7-NEXT: strh r1, [r2] -; CHECK-NODSP-V7-NEXT: subs r0, r1, r0 -; CHECK-NODSP-V7-NEXT: clz r0, r0 -; CHECK-NODSP-V7-NEXT: lsrs r0, r0, #5 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: icmp_sext_zext_store_i8_i16: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: movw r0, :lower16:d_uch -; CHECK-DSP-NEXT: movw r1, :lower16:sh1 -; CHECK-DSP-NEXT: movt r0, :upper16:d_uch -; CHECK-DSP-NEXT: movt r1, :upper16:sh1 -; CHECK-DSP-NEXT: ldrb r0, [r0, #2] -; CHECK-DSP-NEXT: strh r0, [r1] -; CHECK-DSP-NEXT: movw r1, :lower16:d_sh -; CHECK-DSP-NEXT: movt r1, :upper16:d_sh -; CHECK-DSP-NEXT: ldrsh.w r1, [r1, #4] -; CHECK-DSP-NEXT: subs r0, r0, r1 -; CHECK-DSP-NEXT: clz r0, r0 -; CHECK-DSP-NEXT: lsrs r0, r0, #5 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: icmp_sext_zext_store_i8_i16: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: movw r0, :lower16:d_sh -; CHECK-DSP-IMM-NEXT: movw r1, :lower16:d_uch -; CHECK-DSP-IMM-NEXT: movt r0, :upper16:d_sh -; CHECK-DSP-IMM-NEXT: movt r1, :upper16:d_uch -; CHECK-DSP-IMM-NEXT: ldrb r1, [r1, #2] -; CHECK-DSP-IMM-NEXT: movw r2, :lower16:sh1 -; CHECK-DSP-IMM-NEXT: ldrsh.w r0, [r0, #4] -; CHECK-DSP-IMM-NEXT: movt r2, :upper16:sh1 -; CHECK-DSP-IMM-NEXT: strh r1, [r2] -; CHECK-DSP-IMM-NEXT: subs r0, r1, r0 -; CHECK-DSP-IMM-NEXT: clz r0, r0 -; CHECK-DSP-IMM-NEXT: lsrs r0, r0, #5 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %0 = load i8, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @d_uch, i32 0, i32 2), align 1 - %conv = zext i8 %0 to i16 - store i16 %conv, i16* @sh1, align 2 - %conv1 = zext i8 %0 to i32 - %1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @d_sh, i32 0, i32 2), align 2 - %conv2 = sext i16 %1 to i32 - %cmp = icmp eq i32 %conv1, %conv2 - %conv3 = zext i1 %cmp to i32 - ret i32 %conv3 -} - -define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) { -; CHECK-NODSP-V8-LABEL: or_icmp_ugt: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrb r1, [r1] -; CHECK-NODSP-V8-NEXT: adds r2, r1, #3 -; CHECK-NODSP-V8-NEXT: subs.w r0, r0, r2, lsl #1 -; CHECK-NODSP-V8-NEXT: it ne -; CHECK-NODSP-V8-NEXT: movne r0, #1 -; CHECK-NODSP-V8-NEXT: subs r1, #1 -; CHECK-NODSP-V8-NEXT: movs r2, #0 -; CHECK-NODSP-V8-NEXT: cmp r1, #3 -; CHECK-NODSP-V8-NEXT: it hi -; CHECK-NODSP-V8-NEXT: movhi r2, #1 -; CHECK-NODSP-V8-NEXT: orrs r0, r2 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: or_icmp_ugt: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrb r1, [r1] -; CHECK-NODSP-V7-NEXT: adds r2, r1, #3 -; CHECK-NODSP-V7-NEXT: subs r1, #1 -; CHECK-NODSP-V7-NEXT: subs.w r0, r0, r2, lsl #1 -; CHECK-NODSP-V7-NEXT: mov.w r2, #0 -; CHECK-NODSP-V7-NEXT: it ne -; CHECK-NODSP-V7-NEXT: movne r0, #1 -; CHECK-NODSP-V7-NEXT: cmp r1, #3 -; CHECK-NODSP-V7-NEXT: it hi -; CHECK-NODSP-V7-NEXT: movhi r2, #1 -; CHECK-NODSP-V7-NEXT: orrs r0, r2 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: or_icmp_ugt: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrb r1, [r1] -; CHECK-DSP-NEXT: adds r2, r1, #3 -; CHECK-DSP-NEXT: subs r1, #1 -; CHECK-DSP-NEXT: subs.w r0, r0, r2, lsl #1 -; CHECK-DSP-NEXT: mov.w r2, #0 -; CHECK-DSP-NEXT: it ne -; CHECK-DSP-NEXT: movne r0, #1 -; CHECK-DSP-NEXT: cmp r1, #3 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r2, #1 -; CHECK-DSP-NEXT: orrs r0, r2 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: or_icmp_ugt: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrb r1, [r1] -; CHECK-DSP-IMM-NEXT: adds r2, r1, #3 -; CHECK-DSP-IMM-NEXT: subs.w r0, r0, r2, lsl #1 -; CHECK-DSP-IMM-NEXT: it ne -; CHECK-DSP-IMM-NEXT: movne r0, #1 -; CHECK-DSP-IMM-NEXT: subs r1, #1 -; CHECK-DSP-IMM-NEXT: movs r2, #0 -; CHECK-DSP-IMM-NEXT: cmp r1, #3 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r2, #1 -; CHECK-DSP-IMM-NEXT: orrs r0, r2 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %0 = load i8, i8* %ptr - %1 = zext i8 %0 to i32 - %mul = shl nuw nsw i32 %1, 1 - %add0 = add nuw nsw i32 %mul, 6 - %cmp0 = icmp ne i32 %arg, %add0 - %add1 = add i8 %0, -1 - %cmp1 = icmp ugt i8 %add1, 3 - %or = or i1 %cmp0, %cmp1 - ret i1 %or -} - -; We currently only handle truncs as sinks, so a uxt will still be needed for -; the icmp ugt instruction. -define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { -; CHECK-NODSP-V8-LABEL: urem_trunc_icmps: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldr r0, [r0] -; CHECK-NODSP-V8-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V8-NEXT: cbz r0, .LBB5_3 -; CHECK-NODSP-V8-NEXT: @ %bb.1: @ %cond.false.i -; CHECK-NODSP-V8-NEXT: movs r3, #5 -; CHECK-NODSP-V8-NEXT: udiv r3, r3, r0 -; CHECK-NODSP-V8-NEXT: muls r0, r3, r0 -; CHECK-NODSP-V8-NEXT: rsb.w r0, r0, #5 -; CHECK-NODSP-V8-NEXT: .p2align 2 -; CHECK-NODSP-V8-NEXT: .LBB5_2: @ %body -; CHECK-NODSP-V8-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NODSP-V8-NEXT: uxtb r3, r0 -; CHECK-NODSP-V8-NEXT: cmp r3, #7 -; CHECK-NODSP-V8-NEXT: mov.w r3, #0 -; CHECK-NODSP-V8-NEXT: it hi -; CHECK-NODSP-V8-NEXT: movhi r3, #1 -; CHECK-NODSP-V8-NEXT: str r3, [r1] -; CHECK-NODSP-V8-NEXT: ldr r3, [r2] -; CHECK-NODSP-V8-NEXT: cmp r3, #0 -; CHECK-NODSP-V8-NEXT: it ne -; CHECK-NODSP-V8-NEXT: bxne lr -; CHECK-NODSP-V8-NEXT: adds r0, #1 -; CHECK-NODSP-V8-NEXT: b .LBB5_2 -; CHECK-NODSP-V8-NEXT: .LBB5_3: @ %exit -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: urem_trunc_icmps: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r4, r5, r7, lr} -; CHECK-NODSP-V7-NEXT: push {r4, r5, r7, lr} -; CHECK-NODSP-V7-NEXT: ldr r0, [r0] -; CHECK-NODSP-V7-NEXT: mov r5, r1 -; CHECK-NODSP-V7-NEXT: ldrh r1, [r0] -; CHECK-NODSP-V7-NEXT: cbz r1, .LBB5_4 -; CHECK-NODSP-V7-NEXT: @ %bb.1: @ %cond.false.i -; CHECK-NODSP-V7-NEXT: movs r0, #5 -; CHECK-NODSP-V7-NEXT: mov r4, r2 -; CHECK-NODSP-V7-NEXT: bl __aeabi_uidivmod -; CHECK-NODSP-V7-NEXT: .LBB5_2: @ %body -; CHECK-NODSP-V7-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NODSP-V7-NEXT: uxtb r0, r1 -; CHECK-NODSP-V7-NEXT: cmp r0, #7 -; CHECK-NODSP-V7-NEXT: mov.w r0, #0 -; CHECK-NODSP-V7-NEXT: it hi -; CHECK-NODSP-V7-NEXT: movhi r0, #1 -; CHECK-NODSP-V7-NEXT: str r0, [r5] -; CHECK-NODSP-V7-NEXT: ldr r0, [r4] -; CHECK-NODSP-V7-NEXT: cbnz r0, .LBB5_4 -; CHECK-NODSP-V7-NEXT: @ %bb.3: @ %for.inc -; CHECK-NODSP-V7-NEXT: @ in Loop: Header=BB5_2 Depth=1 -; CHECK-NODSP-V7-NEXT: adds r1, #1 -; CHECK-NODSP-V7-NEXT: b .LBB5_2 -; CHECK-NODSP-V7-NEXT: .LBB5_4: @ %exit -; CHECK-NODSP-V7-NEXT: pop {r4, r5, r7, pc} -; -; CHECK-DSP-LABEL: urem_trunc_icmps: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldr r0, [r0] -; CHECK-DSP-NEXT: ldrh r0, [r0] -; CHECK-DSP-NEXT: cbz r0, .LBB5_3 -; CHECK-DSP-NEXT: @ %bb.1: @ %cond.false.i -; CHECK-DSP-NEXT: movs r3, #5 -; CHECK-DSP-NEXT: udiv r3, r3, r0 -; CHECK-DSP-NEXT: muls r0, r3, r0 -; CHECK-DSP-NEXT: rsb.w r0, r0, #5 -; CHECK-DSP-NEXT: .LBB5_2: @ %body -; CHECK-DSP-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-DSP-NEXT: uxtb r3, r0 -; CHECK-DSP-NEXT: cmp r3, #7 -; CHECK-DSP-NEXT: mov.w r3, #0 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r3, #1 -; CHECK-DSP-NEXT: str r3, [r1] -; CHECK-DSP-NEXT: ldr r3, [r2] -; CHECK-DSP-NEXT: cmp r3, #0 -; CHECK-DSP-NEXT: it ne -; CHECK-DSP-NEXT: bxne lr -; CHECK-DSP-NEXT: adds r0, #1 -; CHECK-DSP-NEXT: b .LBB5_2 -; CHECK-DSP-NEXT: .LBB5_3: @ %exit -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: urem_trunc_icmps: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldr r0, [r0] -; CHECK-DSP-IMM-NEXT: ldrh r0, [r0] -; CHECK-DSP-IMM-NEXT: cbz r0, .LBB5_4 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %cond.false.i -; CHECK-DSP-IMM-NEXT: movs r3, #5 -; CHECK-DSP-IMM-NEXT: udiv r3, r3, r0 -; CHECK-DSP-IMM-NEXT: muls r0, r3, r0 -; CHECK-DSP-IMM-NEXT: rsb.w r0, r0, #5 -; CHECK-DSP-IMM-NEXT: .LBB5_2: @ %body -; CHECK-DSP-IMM-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-DSP-IMM-NEXT: uxtb r3, r0 -; CHECK-DSP-IMM-NEXT: cmp r3, #7 -; CHECK-DSP-IMM-NEXT: mov.w r3, #0 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r3, #1 -; CHECK-DSP-IMM-NEXT: str r3, [r1] -; CHECK-DSP-IMM-NEXT: ldr r3, [r2] -; CHECK-DSP-IMM-NEXT: cbnz r3, .LBB5_4 -; CHECK-DSP-IMM-NEXT: @ %bb.3: @ %for.inc -; CHECK-DSP-IMM-NEXT: @ in Loop: Header=BB5_2 Depth=1 -; CHECK-DSP-IMM-NEXT: adds r0, #1 -; CHECK-DSP-IMM-NEXT: b .LBB5_2 -; CHECK-DSP-IMM-NEXT: .LBB5_4: @ %exit -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %ptr = load i16*, i16** %in, align 4 - %ld = load i16, i16* %ptr, align 2 - %cmp.i = icmp eq i16 %ld, 0 - br i1 %cmp.i, label %exit, label %cond.false.i - -cond.false.i: - %rem = urem i16 5, %ld - %extract.t = trunc i16 %rem to i8 - br label %body - -body: - %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] - %cmp = icmp ugt i8 %cond.in.i.off0, 7 - %conv5 = zext i1 %cmp to i32 - store i32 %conv5, i32* %g, align 4 - %.pr = load i32, i32* %k, align 4 - %tobool13150 = icmp eq i32 %.pr, 0 - br i1 %tobool13150, label %for.inc, label %exit - -for.inc: - %add = add nuw i8 %cond.in.i.off0, 1 - br label %body - -exit: - ret void -} - -; Check that %exp requires uxth in all cases, and will also be required to -; promote %1 for the call - unless we can generate a uadd16. -define i32 @zext_load_sink_call(i16* %ptr, i16 %exp) { -; CHECK-NODSP-LABEL: zext_load_sink_call: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: ldrh r0, [r0] -; CHECK-NODSP-NEXT: uxth r2, r1 -; CHECK-NODSP-NEXT: cmp r0, r2 -; CHECK-NODSP-NEXT: itt eq -; CHECK-NODSP-NEXT: moveq r0, #0 -; CHECK-NODSP-NEXT: bxeq lr -; CHECK-NODSP-NEXT: adds r1, #3 -; CHECK-NODSP-NEXT: uxth r1, r1 -; CHECK-NODSP-NEXT: b dummy -; -; CHECK-DSP-LABEL: zext_load_sink_call: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrh r0, [r0] -; CHECK-DSP-NEXT: uxth r2, r1 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: itt eq -; CHECK-DSP-NEXT: moveq r0, #0 -; CHECK-DSP-NEXT: bxeq lr -; CHECK-DSP-NEXT: adds r1, #3 -; CHECK-DSP-NEXT: uxth r1, r1 -; CHECK-DSP-NEXT: b dummy -; -; CHECK-DSP-IMM-LABEL: zext_load_sink_call: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: uxth r2, r1 -; CHECK-DSP-IMM-NEXT: ldrh r0, [r0] -; CHECK-DSP-IMM-NEXT: movs r1, #3 -; CHECK-DSP-IMM-NEXT: uadd16 r1, r2, r1 -; CHECK-DSP-IMM-NEXT: cmp r0, r2 -; CHECK-DSP-IMM-NEXT: bne .LBB6_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %exit -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .LBB6_2: @ %if.then -; CHECK-DSP-IMM-NEXT: b dummy -entry: - %0 = load i16, i16* %ptr, align 4 - %1 = add i16 %exp, 3 - %cmp = icmp eq i16 %0, %exp - br i1 %cmp, label %exit, label %if.then - -if.then: - %conv0 = zext i16 %0 to i32 - %conv1 = zext i16 %1 to i32 - %call = tail call arm_aapcs_vfpcc i32 @dummy(i32 %conv0, i32 %conv1) - br label %exit - -exit: - %exitval = phi i32 [ %call, %if.then ], [ 0, %entry ] - ret i32 %exitval -} - -define i16 @bitcast_i16(i16 zeroext %arg0, i16 zeroext %arg1) { -; CHECK-NODSP-LABEL: bitcast_i16: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: adds r0, #1 -; CHECK-NODSP-NEXT: movw r2, #12345 -; CHECK-NODSP-NEXT: cmp r0, r2 -; CHECK-NODSP-NEXT: it hi -; CHECK-NODSP-NEXT: movwhi r1, #32657 -; CHECK-NODSP-NEXT: mov r0, r1 -; CHECK-NODSP-NEXT: bx lr -; -; CHECK-DSP-LABEL: bitcast_i16: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: adds r0, #1 -; CHECK-DSP-NEXT: movw r2, #12345 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movwhi r1, #32657 -; CHECK-DSP-NEXT: mov r0, r1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: bitcast_i16: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: adds r2, r0, #1 -; CHECK-DSP-IMM-NEXT: movw r0, #32657 -; CHECK-DSP-IMM-NEXT: movw r3, #12345 -; CHECK-DSP-IMM-NEXT: cmp r2, r3 -; CHECK-DSP-IMM-NEXT: it ls -; CHECK-DSP-IMM-NEXT: movls r0, r1 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %cast = bitcast i16 12345 to i16 - %add = add nuw i16 %arg0, 1 - %cmp = icmp ule i16 %add, %cast - %res = select i1 %cmp, i16 %arg1, i16 32657 - ret i16 %res -} - -define i8 @bitcast_i8(i8 zeroext %arg0, i8 zeroext %arg1) { -; CHECK-LABEL: bitcast_i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r2, #127 -; CHECK-NEXT: cmp.w r1, r0, lsl #1 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r2, #127 -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: bx lr -entry: - %cast = bitcast i8 127 to i8 - %mul = shl nuw i8 %arg0, 1 - %cmp = icmp uge i8 %mul, %arg1 - %res = select i1 %cmp, i8 %cast, i8 128 - ret i8 %res -} - -define i16 @bitcast_i16_minus(i16 zeroext %arg0, i16 zeroext %arg1) { -; CHECK-NODSP-LABEL: bitcast_i16_minus: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: eor r2, r0, #7 -; CHECK-NODSP-NEXT: movw r0, #32657 -; CHECK-NODSP-NEXT: cmp r2, r1 -; CHECK-NODSP-NEXT: itt eq -; CHECK-NODSP-NEXT: movweq r0, #53191 -; CHECK-NODSP-NEXT: movteq r0, #65535 -; CHECK-NODSP-NEXT: bx lr -; -; CHECK-DSP-LABEL: bitcast_i16_minus: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: eor r2, r0, #7 -; CHECK-DSP-NEXT: movw r0, #32657 -; CHECK-DSP-NEXT: cmp r2, r1 -; CHECK-DSP-NEXT: itt eq -; CHECK-DSP-NEXT: movweq r0, #53191 -; CHECK-DSP-NEXT: movteq r0, #65535 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: bitcast_i16_minus: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: eor r2, r0, #7 -; CHECK-DSP-IMM-NEXT: movw r0, #32657 -; CHECK-DSP-IMM-NEXT: cmp r2, r1 -; CHECK-DSP-IMM-NEXT: it eq -; CHECK-DSP-IMM-NEXT: movweq r0, #53191 -; CHECK-DSP-IMM-NEXT: it eq -; CHECK-DSP-IMM-NEXT: movteq r0, #65535 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %cast = bitcast i16 -12345 to i16 - %xor = xor i16 %arg0, 7 - %cmp = icmp eq i16 %xor, %arg1 - %res = select i1 %cmp, i16 %cast, i16 32657 - ret i16 %res -} - -define i8 @bitcast_i8_minus(i8 zeroext %arg0, i8 zeroext %arg1) { -; CHECK-LABEL: bitcast_i8_minus: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: and r2, r0, #3 -; CHECK-NEXT: mvn r0, #127 -; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r0, #126 -; CHECK-NEXT: bx lr -entry: - %cast = bitcast i8 -127 to i8 - %and = and i8 %arg0, 3 - %cmp = icmp ne i8 %and, %arg1 - %res = select i1 %cmp, i8 %cast, i8 128 - ret i8 %res -} - -declare i32 @dummy(i32, i32) - -@d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1 -@sh1 = hidden local_unnamed_addr global i16 0, align 2 -@d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2 - -define i8* @two_stage_zext_trunc_mix(i32* %this, i32 %__pos1, i32 %__n1, i32** %__str, i32 %__pos2, i32 %__n2) { -; CHECK-NODSP-V8-LABEL: two_stage_zext_trunc_mix: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldr.w r12, [sp] -; CHECK-NODSP-V8-NEXT: ldrb r2, [r0] -; CHECK-NODSP-V8-NEXT: add.w r0, r3, r12 -; CHECK-NODSP-V8-NEXT: lsls r2, r2, #31 -; CHECK-NODSP-V8-NEXT: it eq -; CHECK-NODSP-V8-NEXT: addeq r0, r3, r1 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: two_stage_zext_trunc_mix: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrb r2, [r0] -; CHECK-NODSP-V7-NEXT: ldr.w r12, [sp] -; CHECK-NODSP-V7-NEXT: add.w r0, r3, r12 -; CHECK-NODSP-V7-NEXT: lsls r2, r2, #31 -; CHECK-NODSP-V7-NEXT: it eq -; CHECK-NODSP-V7-NEXT: addeq r0, r3, r1 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: two_stage_zext_trunc_mix: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldr r2, [sp] -; CHECK-DSP-NEXT: ldrb r0, [r0] -; CHECK-DSP-NEXT: add r2, r3 -; CHECK-DSP-NEXT: lsls r0, r0, #31 -; CHECK-DSP-NEXT: it eq -; CHECK-DSP-NEXT: addeq r2, r3, r1 -; CHECK-DSP-NEXT: mov r0, r2 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: two_stage_zext_trunc_mix: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldr.w r12, [sp] -; CHECK-DSP-IMM-NEXT: ldrb r2, [r0] -; CHECK-DSP-IMM-NEXT: adds r0, r3, r1 -; CHECK-DSP-IMM-NEXT: add r12, r3 -; CHECK-DSP-IMM-NEXT: lsls r1, r2, #31 -; CHECK-DSP-IMM-NEXT: it ne -; CHECK-DSP-IMM-NEXT: movne r0, r12 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %__size_.i.i.i.i = bitcast i32** %__str to i8* - %0 = load i8, i8* %__size_.i.i.i.i, align 4 - %1 = and i8 %0, 1 - %tobool.i.i.i.i = icmp eq i8 %1, 0 - %__size_.i5.i.i = getelementptr inbounds i32*, i32** %__str, i32 %__n1 - %cast = bitcast i32** %__size_.i5.i.i to i32* - %2 = load i32, i32* %cast, align 4 - %3 = lshr i8 %0, 1 - %4 = zext i8 %3 to i32 - %cond.i.i = select i1 %tobool.i.i.i.i, i32 %4, i32 %2 - %__size_.i.i.i.i.i = bitcast i32* %this to i8* - %5 = load i8, i8* %__size_.i.i.i.i.i, align 4 - %6 = and i8 %5, 1 - %tobool.i.i.i.i.i = icmp eq i8 %6, 0 - %7 = getelementptr inbounds i8, i8* %__size_.i.i.i.i, i32 %__pos1 - %8 = getelementptr inbounds i8, i8* %__size_.i.i.i.i, i32 %__pos2 - %res = select i1 %tobool.i.i.i.i.i, i8* %7, i8* %8 - ret i8* %res -} - -define i8 @search_through_zext_1(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c) { -; CHECK-NODSP-V8-LABEL: search_through_zext_1: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: subs r3, r1, r0 -; CHECK-NODSP-V8-NEXT: add.w r12, r0, r1 -; CHECK-NODSP-V8-NEXT: cmp r3, r2 -; CHECK-NODSP-V8-NEXT: it ls -; CHECK-NODSP-V8-NEXT: movls r0, r1 -; CHECK-NODSP-V8-NEXT: cmp r12, r2 -; CHECK-NODSP-V8-NEXT: it hs -; CHECK-NODSP-V8-NEXT: movhs r0, #0 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: search_through_zext_1: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: subs r3, r1, r0 -; CHECK-NODSP-V7-NEXT: cmp r3, r2 -; CHECK-NODSP-V7-NEXT: mov r3, r1 -; CHECK-NODSP-V7-NEXT: it hi -; CHECK-NODSP-V7-NEXT: movhi r3, r0 -; CHECK-NODSP-V7-NEXT: add r0, r1 -; CHECK-NODSP-V7-NEXT: cmp r0, r2 -; CHECK-NODSP-V7-NEXT: it hs -; CHECK-NODSP-V7-NEXT: movhs r3, #0 -; CHECK-NODSP-V7-NEXT: mov r0, r3 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: search_through_zext_1: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: subs r3, r1, r0 -; CHECK-DSP-NEXT: cmp r3, r2 -; CHECK-DSP-NEXT: mov r3, r1 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r3, r0 -; CHECK-DSP-NEXT: add r0, r1 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: it hs -; CHECK-DSP-NEXT: movhs r3, #0 -; CHECK-DSP-NEXT: mov r0, r3 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: search_through_zext_1: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: subs r3, r1, r0 -; CHECK-DSP-IMM-NEXT: cmp r3, r2 -; CHECK-DSP-IMM-NEXT: mov r3, r1 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r3, r0 -; CHECK-DSP-IMM-NEXT: add r1, r0 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: cmp r1, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r3 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %add = add nuw i8 %a, %b - %conv = zext i8 %add to i16 - %cmp = icmp ult i16 %conv, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %sub = sub nuw i8 %b, %a - %conv2 = zext i8 %sub to i16 - %cmp2 = icmp ugt i16 %conv2, %c - %res = select i1 %cmp2, i8 %a, i8 %b - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] - ret i8 %retval -} - -; TODO: We should be able to remove the uxtb here. The transform fails because -; the icmp ugt uses an i32, which is too large... but this doesn't matter -; because it won't be writing a large value to a register as a result. -define i8 @search_through_zext_2(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { -; CHECK-NODSP-V8-LABEL: search_through_zext_2: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: push {r7, lr} -; CHECK-NODSP-V8-NEXT: sub.w lr, r1, r0 -; CHECK-NODSP-V8-NEXT: add.w r12, r0, r1 -; CHECK-NODSP-V8-NEXT: uxtb.w lr, lr -; CHECK-NODSP-V8-NEXT: uxtb.w r12, r12 -; CHECK-NODSP-V8-NEXT: cmp lr, r3 -; CHECK-NODSP-V8-NEXT: it ls -; CHECK-NODSP-V8-NEXT: movls r0, r1 -; CHECK-NODSP-V8-NEXT: cmp r12, r2 -; CHECK-NODSP-V8-NEXT: it hs -; CHECK-NODSP-V8-NEXT: movhs r0, #0 -; CHECK-NODSP-V8-NEXT: pop {r7, pc} -; -; CHECK-NODSP-V7-LABEL: search_through_zext_2: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: sub.w r12, r1, r0 -; CHECK-NODSP-V7-NEXT: uxtb.w r12, r12 -; CHECK-NODSP-V7-NEXT: cmp r12, r3 -; CHECK-NODSP-V7-NEXT: mov r3, r1 -; CHECK-NODSP-V7-NEXT: it hi -; CHECK-NODSP-V7-NEXT: movhi r3, r0 -; CHECK-NODSP-V7-NEXT: add r0, r1 -; CHECK-NODSP-V7-NEXT: uxtb r0, r0 -; CHECK-NODSP-V7-NEXT: cmp r0, r2 -; CHECK-NODSP-V7-NEXT: it hs -; CHECK-NODSP-V7-NEXT: movhs r3, #0 -; CHECK-NODSP-V7-NEXT: mov r0, r3 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: search_through_zext_2: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: sub.w r12, r1, r0 -; CHECK-DSP-NEXT: uxtb.w r12, r12 -; CHECK-DSP-NEXT: cmp r12, r3 -; CHECK-DSP-NEXT: mov r3, r1 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r3, r0 -; CHECK-DSP-NEXT: add r0, r1 -; CHECK-DSP-NEXT: uxtb r0, r0 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: it hs -; CHECK-DSP-NEXT: movhs r3, #0 -; CHECK-DSP-NEXT: mov r0, r3 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: search_through_zext_2: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: sub.w r12, r1, r0 -; CHECK-DSP-IMM-NEXT: uxtb.w r12, r12 -; CHECK-DSP-IMM-NEXT: cmp r12, r3 -; CHECK-DSP-IMM-NEXT: mov r3, r1 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r3, r0 -; CHECK-DSP-IMM-NEXT: add r0, r1 -; CHECK-DSP-IMM-NEXT: uxtb r1, r0 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: cmp r1, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r3 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %add = add nuw i8 %a, %b - %conv = zext i8 %add to i16 - %cmp = icmp ult i16 %conv, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %sub = sub nuw i8 %b, %a - %conv2 = zext i8 %sub to i32 - %cmp2 = icmp ugt i32 %conv2, %d - %res = select i1 %cmp2, i8 %a, i8 %b - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] - ret i8 %retval -} - -; TODO: We should be able to remove the uxtb here as all the calculations are -; performed on i8s. The promotion of i8 to i16 and then the later truncation -; results in the uxtb. -define i8 @search_through_zext_3(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { -; CHECK-NODSP-LABEL: search_through_zext_3: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: add.w r12, r0, r1 -; CHECK-NODSP-NEXT: uxtb.w r12, r12 -; CHECK-NODSP-NEXT: cmp r12, r2 -; CHECK-NODSP-NEXT: itt hs -; CHECK-NODSP-NEXT: movhs r0, #0 -; CHECK-NODSP-NEXT: bxhs lr -; CHECK-NODSP-NEXT: sub.w r2, r1, r12 -; CHECK-NODSP-NEXT: uxtb r2, r2 -; CHECK-NODSP-NEXT: cmp r2, r3 -; CHECK-NODSP-NEXT: it ls -; CHECK-NODSP-NEXT: movls r0, r1 -; CHECK-NODSP-NEXT: bx lr -; -; CHECK-DSP-LABEL: search_through_zext_3: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: add.w r12, r0, r1 -; CHECK-DSP-NEXT: uxtb.w r12, r12 -; CHECK-DSP-NEXT: cmp r12, r2 -; CHECK-DSP-NEXT: itt hs -; CHECK-DSP-NEXT: movhs r0, #0 -; CHECK-DSP-NEXT: bxhs lr -; CHECK-DSP-NEXT: sub.w r2, r1, r12 -; CHECK-DSP-NEXT: uxtb r2, r2 -; CHECK-DSP-NEXT: cmp r2, r3 -; CHECK-DSP-NEXT: it ls -; CHECK-DSP-NEXT: movls r0, r1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: search_through_zext_3: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: add.w r12, r0, r1 -; CHECK-DSP-IMM-NEXT: uxtb.w r12, r12 -; CHECK-DSP-IMM-NEXT: cmp r12, r2 -; CHECK-DSP-IMM-NEXT: bhs .LBB14_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %if.then -; CHECK-DSP-IMM-NEXT: sub.w r2, r1, r12 -; CHECK-DSP-IMM-NEXT: uxtb r2, r2 -; CHECK-DSP-IMM-NEXT: cmp r2, r3 -; CHECK-DSP-IMM-NEXT: it ls -; CHECK-DSP-IMM-NEXT: movls r0, r1 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .LBB14_2: -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %add = add nuw i8 %a, %b - %conv = zext i8 %add to i16 - %cmp = icmp ult i16 %conv, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %trunc = trunc i16 %conv to i8 - %sub = sub nuw i8 %b, %trunc - %conv2 = zext i8 %sub to i32 - %cmp2 = icmp ugt i32 %conv2, %d - %res = select i1 %cmp2, i8 %a, i8 %b - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] - ret i8 %retval -} - -; TODO: We should be able to remove the uxt that gets introduced for %conv2 -define i8 @search_through_zext_cmp(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c) { -; CHECK-NODSP-V8-LABEL: search_through_zext_cmp: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: subs r3, r1, r0 -; CHECK-NODSP-V8-NEXT: subs.w r12, r1, r0 -; CHECK-NODSP-V8-NEXT: uxtb r3, r3 -; CHECK-NODSP-V8-NEXT: it ne -; CHECK-NODSP-V8-NEXT: movne.w r12, #1 -; CHECK-NODSP-V8-NEXT: cmp r3, r2 -; CHECK-NODSP-V8-NEXT: it ls -; CHECK-NODSP-V8-NEXT: movls r0, r1 -; CHECK-NODSP-V8-NEXT: cmp r12, r2 -; CHECK-NODSP-V8-NEXT: it hs -; CHECK-NODSP-V8-NEXT: movhs r0, #0 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: search_through_zext_cmp: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: subs r3, r1, r0 -; CHECK-NODSP-V7-NEXT: subs.w r12, r1, r0 -; CHECK-NODSP-V7-NEXT: it ne -; CHECK-NODSP-V7-NEXT: movne.w r12, #1 -; CHECK-NODSP-V7-NEXT: uxtb r3, r3 -; CHECK-NODSP-V7-NEXT: cmp r3, r2 -; CHECK-NODSP-V7-NEXT: it ls -; CHECK-NODSP-V7-NEXT: movls r0, r1 -; CHECK-NODSP-V7-NEXT: cmp r12, r2 -; CHECK-NODSP-V7-NEXT: it hs -; CHECK-NODSP-V7-NEXT: movhs r0, #0 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: search_through_zext_cmp: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: subs r3, r1, r0 -; CHECK-DSP-NEXT: subs.w r12, r1, r0 -; CHECK-DSP-NEXT: uxtb r3, r3 -; CHECK-DSP-NEXT: it ne -; CHECK-DSP-NEXT: movne.w r12, #1 -; CHECK-DSP-NEXT: cmp r3, r2 -; CHECK-DSP-NEXT: it ls -; CHECK-DSP-NEXT: movls r0, r1 -; CHECK-DSP-NEXT: cmp r12, r2 -; CHECK-DSP-NEXT: it hs -; CHECK-DSP-NEXT: movhs r0, #0 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: search_through_zext_cmp: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: subs.w r12, r1, r0 -; CHECK-DSP-IMM-NEXT: it ne -; CHECK-DSP-IMM-NEXT: movne.w r12, #1 -; CHECK-DSP-IMM-NEXT: subs r3, r1, r0 -; CHECK-DSP-IMM-NEXT: uxtb r3, r3 -; CHECK-DSP-IMM-NEXT: cmp r3, r2 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r1, r0 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: cmp r12, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r1 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %cmp = icmp ne i8 %a, %b - %conv = zext i1 %cmp to i16 - %cmp1 = icmp ult i16 %conv, %c - br i1 %cmp1, label %if.then, label %if.end - -if.then: - %sub = sub nuw i8 %b, %a - %conv2 = zext i8 %sub to i16 - %cmp3 = icmp ugt i16 %conv2, %c - %res = select i1 %cmp3, i8 %a, i8 %b - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] - ret i8 %retval -} - -define i8 @search_through_zext_load(i8* %a, i8 zeroext %b, i16 zeroext %c) { -; CHECK-NODSP-V8-LABEL: search_through_zext_load: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrb r3, [r0] -; CHECK-NODSP-V8-NEXT: mov r0, r1 -; CHECK-NODSP-V8-NEXT: subs r1, r1, r3 -; CHECK-NODSP-V8-NEXT: cmp r1, r2 -; CHECK-NODSP-V8-NEXT: it hi -; CHECK-NODSP-V8-NEXT: movhi r0, r3 -; CHECK-NODSP-V8-NEXT: cmp r3, r2 -; CHECK-NODSP-V8-NEXT: it hs -; CHECK-NODSP-V8-NEXT: movhs r0, #0 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: search_through_zext_load: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V7-NEXT: subs r3, r1, r0 -; CHECK-NODSP-V7-NEXT: cmp r3, r2 -; CHECK-NODSP-V7-NEXT: it hi -; CHECK-NODSP-V7-NEXT: movhi r1, r0 -; CHECK-NODSP-V7-NEXT: cmp r0, r2 -; CHECK-NODSP-V7-NEXT: it hs -; CHECK-NODSP-V7-NEXT: movhs r1, #0 -; CHECK-NODSP-V7-NEXT: mov r0, r1 -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: search_through_zext_load: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrb r0, [r0] -; CHECK-DSP-NEXT: subs r3, r1, r0 -; CHECK-DSP-NEXT: cmp r3, r2 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r1, r0 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: it hs -; CHECK-DSP-NEXT: movhs r1, #0 -; CHECK-DSP-NEXT: mov r0, r1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: search_through_zext_load: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrb r3, [r0] -; CHECK-DSP-IMM-NEXT: subs r0, r1, r3 -; CHECK-DSP-IMM-NEXT: cmp r0, r2 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r1, r3 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: cmp r3, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r1 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %load = load i8, i8* %a - %conv = zext i8 %load to i16 - %cmp1 = icmp ult i16 %conv, %c - br i1 %cmp1, label %if.then, label %if.end - -if.then: - %sub = sub nuw i8 %b, %load - %conv2 = zext i8 %sub to i16 - %cmp3 = icmp ugt i16 %conv2, %c - %res = select i1 %cmp3, i8 %load, i8 %b - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] - ret i8 %retval -} - -define i16 @trunc_sink_less_than_cmp(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d) { -; CHECK-NODSP-V8-LABEL: trunc_sink_less_than_cmp: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: push {r7, lr} -; CHECK-NODSP-V8-NEXT: sub.w r12, r1, r0 -; CHECK-NODSP-V8-NEXT: adds r3, #1 -; CHECK-NODSP-V8-NEXT: uxth.w lr, r12 -; CHECK-NODSP-V8-NEXT: uxtb.w r12, r12 -; CHECK-NODSP-V8-NEXT: uxtb r3, r3 -; CHECK-NODSP-V8-NEXT: cmp r12, r3 -; CHECK-NODSP-V8-NEXT: it ls -; CHECK-NODSP-V8-NEXT: movls r0, r1 -; CHECK-NODSP-V8-NEXT: cmp lr, r2 -; CHECK-NODSP-V8-NEXT: it hs -; CHECK-NODSP-V8-NEXT: movhs r0, #0 -; CHECK-NODSP-V8-NEXT: pop {r7, pc} -; -; CHECK-NODSP-V7-LABEL: trunc_sink_less_than_cmp: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r7, lr} -; CHECK-NODSP-V7-NEXT: push {r7, lr} -; CHECK-NODSP-V7-NEXT: adds r3, #1 -; CHECK-NODSP-V7-NEXT: sub.w r12, r1, r0 -; CHECK-NODSP-V7-NEXT: uxtb.w lr, r12 -; CHECK-NODSP-V7-NEXT: uxtb r3, r3 -; CHECK-NODSP-V7-NEXT: cmp lr, r3 -; CHECK-NODSP-V7-NEXT: it ls -; CHECK-NODSP-V7-NEXT: movls r0, r1 -; CHECK-NODSP-V7-NEXT: uxth.w r1, r12 -; CHECK-NODSP-V7-NEXT: cmp r1, r2 -; CHECK-NODSP-V7-NEXT: it hs -; CHECK-NODSP-V7-NEXT: movhs r0, #0 -; CHECK-NODSP-V7-NEXT: pop {r7, pc} -; -; CHECK-DSP-LABEL: trunc_sink_less_than_cmp: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: push {r7, lr} -; CHECK-DSP-NEXT: adds r3, #1 -; CHECK-DSP-NEXT: sub.w r12, r1, r0 -; CHECK-DSP-NEXT: uxtb.w lr, r12 -; CHECK-DSP-NEXT: uxtb r3, r3 -; CHECK-DSP-NEXT: cmp lr, r3 -; CHECK-DSP-NEXT: it ls -; CHECK-DSP-NEXT: movls r0, r1 -; CHECK-DSP-NEXT: uxth.w r1, r12 -; CHECK-DSP-NEXT: cmp r1, r2 -; CHECK-DSP-NEXT: it hs -; CHECK-DSP-NEXT: movhs r0, #0 -; CHECK-DSP-NEXT: pop {r7, pc} -; -; CHECK-DSP-IMM-LABEL: trunc_sink_less_than_cmp: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: push {r7, lr} -; CHECK-DSP-IMM-NEXT: adds r3, #1 -; CHECK-DSP-IMM-NEXT: sub.w r12, r1, r0 -; CHECK-DSP-IMM-NEXT: uxtb r3, r3 -; CHECK-DSP-IMM-NEXT: uxtb.w lr, r12 -; CHECK-DSP-IMM-NEXT: cmp lr, r3 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r1, r0 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: uxth.w r3, r12 -; CHECK-DSP-IMM-NEXT: cmp r3, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r1 -; CHECK-DSP-IMM-NEXT: pop {r7, pc} -entry: - %sub = sub nuw i16 %b, %a - %cmp = icmp ult i16 %sub, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %trunc = trunc i16 %sub to i8 - %add = add nuw i8 %d, 1 - %cmp2 = icmp ugt i8 %trunc, %add - %res = select i1 %cmp2, i16 %a, i16 %b - br label %if.end - -if.end: - %retval = phi i16 [ 0, %entry ], [ %res, %if.then ] - ret i16 %retval -} - -; TODO: We should be able to remove the uxth introduced to handle %sub -define i16 @trunc_sink_less_than_arith(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { -; CHECK-NODSP-V8-LABEL: trunc_sink_less_than_arith: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: push {r4, lr} -; CHECK-NODSP-V8-NEXT: sub.w lr, r1, r0 -; CHECK-NODSP-V8-NEXT: ldr.w r12, [sp, #8] -; CHECK-NODSP-V8-NEXT: add r3, lr -; CHECK-NODSP-V8-NEXT: uxtb r3, r3 -; CHECK-NODSP-V8-NEXT: uxth.w r4, lr -; CHECK-NODSP-V8-NEXT: cmp r12, r3 -; CHECK-NODSP-V8-NEXT: it ls -; CHECK-NODSP-V8-NEXT: movls r0, r1 -; CHECK-NODSP-V8-NEXT: cmp r4, r2 -; CHECK-NODSP-V8-NEXT: it hs -; CHECK-NODSP-V8-NEXT: movhs r0, #0 -; CHECK-NODSP-V8-NEXT: pop {r4, pc} -; -; CHECK-NODSP-V7-LABEL: trunc_sink_less_than_arith: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r7, lr} -; CHECK-NODSP-V7-NEXT: push {r7, lr} -; CHECK-NODSP-V7-NEXT: sub.w lr, r1, r0 -; CHECK-NODSP-V7-NEXT: ldr.w r12, [sp, #8] -; CHECK-NODSP-V7-NEXT: add r3, lr -; CHECK-NODSP-V7-NEXT: uxtb r3, r3 -; CHECK-NODSP-V7-NEXT: cmp r12, r3 -; CHECK-NODSP-V7-NEXT: it ls -; CHECK-NODSP-V7-NEXT: movls r0, r1 -; CHECK-NODSP-V7-NEXT: uxth.w r1, lr -; CHECK-NODSP-V7-NEXT: cmp r1, r2 -; CHECK-NODSP-V7-NEXT: it hs -; CHECK-NODSP-V7-NEXT: movhs r0, #0 -; CHECK-NODSP-V7-NEXT: pop {r7, pc} -; -; CHECK-DSP-LABEL: trunc_sink_less_than_arith: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: push {r7, lr} -; CHECK-DSP-NEXT: sub.w r12, r1, r0 -; CHECK-DSP-NEXT: add r3, r12 -; CHECK-DSP-NEXT: uxtb.w lr, r3 -; CHECK-DSP-NEXT: ldr r3, [sp, #8] -; CHECK-DSP-NEXT: cmp r3, lr -; CHECK-DSP-NEXT: it ls -; CHECK-DSP-NEXT: movls r0, r1 -; CHECK-DSP-NEXT: uxth.w r1, r12 -; CHECK-DSP-NEXT: cmp r1, r2 -; CHECK-DSP-NEXT: it hs -; CHECK-DSP-NEXT: movhs r0, #0 -; CHECK-DSP-NEXT: pop {r7, pc} -; -; CHECK-DSP-IMM-LABEL: trunc_sink_less_than_arith: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: push {r7, lr} -; CHECK-DSP-IMM-NEXT: sub.w lr, r1, r0 -; CHECK-DSP-IMM-NEXT: ldr.w r12, [sp, #8] -; CHECK-DSP-IMM-NEXT: add r3, lr -; CHECK-DSP-IMM-NEXT: uxtb r3, r3 -; CHECK-DSP-IMM-NEXT: cmp r12, r3 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r1, r0 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: uxth.w r3, lr -; CHECK-DSP-IMM-NEXT: cmp r3, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r1 -; CHECK-DSP-IMM-NEXT: pop {r7, pc} -entry: - %sub = sub nuw i16 %b, %a - %cmp = icmp ult i16 %sub, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %trunc = trunc i16 %sub to i8 - %add = add nuw i8 %d, %trunc - %cmp2 = icmp ugt i8 %e, %add - %res = select i1 %cmp2, i16 %a, i16 %b - br label %if.end - -if.end: - %retval = phi i16 [ 0, %entry ], [ %res, %if.then ] - ret i16 %retval -} - -define i16 @trunc_sink_less_than_store(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8* %e) { -; CHECK-NODSP-LABEL: trunc_sink_less_than_store: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: subs r0, r1, r0 -; CHECK-NODSP-NEXT: cmp r0, r2 -; CHECK-NODSP-NEXT: iteee hs -; CHECK-NODSP-NEXT: movhs r0, #0 -; CHECK-NODSP-NEXT: ldrlo r1, [sp] -; CHECK-NODSP-NEXT: addlo r2, r3, r0 -; CHECK-NODSP-NEXT: strblo r2, [r1] -; CHECK-NODSP-NEXT: bx lr -; -; CHECK-DSP-LABEL: trunc_sink_less_than_store: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: subs r0, r1, r0 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: iteee hs -; CHECK-DSP-NEXT: movhs r0, #0 -; CHECK-DSP-NEXT: ldrlo r1, [sp] -; CHECK-DSP-NEXT: addlo r2, r3, r0 -; CHECK-DSP-NEXT: strblo r2, [r1] -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: trunc_sink_less_than_store: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: subs r0, r1, r0 -; CHECK-DSP-IMM-NEXT: cmp r0, r2 -; CHECK-DSP-IMM-NEXT: bhs .LBB19_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %if.then -; CHECK-DSP-IMM-NEXT: ldr r1, [sp] -; CHECK-DSP-IMM-NEXT: adds r2, r3, r0 -; CHECK-DSP-IMM-NEXT: strb r2, [r1] -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .LBB19_2: -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %sub = sub nuw i16 %b, %a - %cmp = icmp ult i16 %sub, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %trunc = trunc i16 %sub to i8 - %add = add nuw i8 %d, %trunc - store i8 %add, i8* %e - br label %if.end - -if.end: - %retval = phi i16 [ 0, %entry ], [ %sub, %if.then ] - ret i16 %retval -} - -define i8 @trunc_sink_less_than_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { -; CHECK-NODSP-LABEL: trunc_sink_less_than_ret: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: subs r1, r1, r0 -; CHECK-NODSP-NEXT: movs r0, #0 -; CHECK-NODSP-NEXT: cmp r1, r2 -; CHECK-NODSP-NEXT: it lo -; CHECK-NODSP-NEXT: uxtablo r0, r3, r1 -; CHECK-NODSP-NEXT: bx lr -; -; CHECK-DSP-LABEL: trunc_sink_less_than_ret: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: subs r1, r1, r0 -; CHECK-DSP-NEXT: movs r0, #0 -; CHECK-DSP-NEXT: cmp r1, r2 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: uxtablo r0, r3, r1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: trunc_sink_less_than_ret: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: subs r1, r1, r0 -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: cmp r1, r2 -; CHECK-DSP-IMM-NEXT: uxtab r3, r3, r1 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, r3 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %sub = sub nuw i16 %b, %a - %cmp = icmp ult i16 %sub, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %trunc = trunc i16 %sub to i8 - %add = add nuw i8 %d, %trunc - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %add, %if.then ] - ret i8 %retval -} - -define zeroext i8 @trunc_sink_less_than_zext_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { -; CHECK-NODSP-LABEL: trunc_sink_less_than_zext_ret: -; CHECK-NODSP: @ %bb.0: @ %entry -; CHECK-NODSP-NEXT: subs r0, r1, r0 -; CHECK-NODSP-NEXT: movs r1, #0 -; CHECK-NODSP-NEXT: cmp r0, r2 -; CHECK-NODSP-NEXT: it lo -; CHECK-NODSP-NEXT: addlo r1, r3, r0 -; CHECK-NODSP-NEXT: uxtb r0, r1 -; CHECK-NODSP-NEXT: bx lr -; -; CHECK-DSP-LABEL: trunc_sink_less_than_zext_ret: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: subs r0, r1, r0 -; CHECK-DSP-NEXT: movs r1, #0 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: addlo r1, r3, r0 -; CHECK-DSP-NEXT: uxtb r0, r1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: trunc_sink_less_than_zext_ret: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: subs r0, r1, r0 -; CHECK-DSP-IMM-NEXT: adds r1, r3, r0 -; CHECK-DSP-IMM-NEXT: movs r3, #0 -; CHECK-DSP-IMM-NEXT: cmp r0, r2 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r3, r1 -; CHECK-DSP-IMM-NEXT: uxtb r0, r3 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %sub = sub nuw i16 %b, %a - %cmp = icmp ult i16 %sub, %c - br i1 %cmp, label %if.then, label %if.end - -if.then: - %trunc = trunc i16 %sub to i8 - %add = add nuw i8 %d, %trunc - br label %if.end - -if.end: - %retval = phi i8 [ 0, %entry ], [ %add, %if.then ] - ret i8 %retval -} - -define i32 @bitcast_i1(i16 zeroext %a, i32 %b, i32 %c) { -; CHECK-LABEL: bitcast_i1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: ands r0, r0, #1 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, r1 -; CHECK-NEXT: bx lr -entry: - %0 = bitcast i1 1 to i1 - %1 = trunc i16 %a to i1 - %cmp = icmp eq i1 %1, %0 - br i1 %cmp, label %if.then, label %exit - -if.then: - %conv = zext i1 %0 to i16 - %conv1 = zext i1 %1 to i16 - %cmp1 = icmp uge i16 %conv, %conv1 - %select = select i1 %cmp1, i32 %b, i32 %c - br label %exit - -exit: - %retval = phi i32 [ %select, %if.then ], [ 0, %entry ] - ret i32 %retval -} - -define void @search_back_through_trunc(i8* %a, i8* %b, i8* %c, i8* %d, i16* %e) { -; CHECK-NODSP-V8-LABEL: search_back_through_trunc: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: push {r7, lr} -; CHECK-NODSP-V8-NEXT: ldrb.w r12, [r0] -; CHECK-NODSP-V8-NEXT: ldrb.w lr, [r1] -; CHECK-NODSP-V8-NEXT: ldrb r1, [r2] -; CHECK-NODSP-V8-NEXT: ldrb r0, [r3] -; CHECK-NODSP-V8-NEXT: orr.w r12, lr, r12, lsl #8 -; CHECK-NODSP-V8-NEXT: orr.w r0, r0, r1, lsl #8 -; CHECK-NODSP-V8-NEXT: cmp r12, r0 -; CHECK-NODSP-V8-NEXT: beq .LBB23_2 -; CHECK-NODSP-V8-NEXT: @ %bb.1: @ %if.else136 -; CHECK-NODSP-V8-NEXT: ldr r0, [sp, #8] -; CHECK-NODSP-V8-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V8-NEXT: uxtb.w lr, r0 -; CHECK-NODSP-V8-NEXT: lsrs r1, r0, #8 -; CHECK-NODSP-V8-NEXT: .LBB23_2: @ %if.end183 -; CHECK-NODSP-V8-NEXT: strb r1, [r2] -; CHECK-NODSP-V8-NEXT: strb.w lr, [r3] -; CHECK-NODSP-V8-NEXT: pop {r7, pc} -; -; CHECK-NODSP-V7-LABEL: search_back_through_trunc: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r4, lr} -; CHECK-NODSP-V7-NEXT: push {r4, lr} -; CHECK-NODSP-V7-NEXT: ldrb r4, [r0] -; CHECK-NODSP-V7-NEXT: ldrb.w r12, [r2] -; CHECK-NODSP-V7-NEXT: ldrb r0, [r1] -; CHECK-NODSP-V7-NEXT: ldrb.w lr, [r3] -; CHECK-NODSP-V7-NEXT: orr.w r4, r0, r4, lsl #8 -; CHECK-NODSP-V7-NEXT: orr.w r1, lr, r12, lsl #8 -; CHECK-NODSP-V7-NEXT: cmp r4, r1 -; CHECK-NODSP-V7-NEXT: itttt ne -; CHECK-NODSP-V7-NEXT: ldrne r0, [sp, #8] -; CHECK-NODSP-V7-NEXT: ldrhne r0, [r0] -; CHECK-NODSP-V7-NEXT: lsrne.w r12, r0, #8 -; CHECK-NODSP-V7-NEXT: uxtbne r0, r0 -; CHECK-NODSP-V7-NEXT: strb.w r12, [r2] -; CHECK-NODSP-V7-NEXT: strb r0, [r3] -; CHECK-NODSP-V7-NEXT: pop {r4, pc} -; -; CHECK-DSP-LABEL: search_back_through_trunc: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: push {r4, lr} -; CHECK-DSP-NEXT: ldrb r4, [r0] -; CHECK-DSP-NEXT: ldrb r0, [r1] -; CHECK-DSP-NEXT: ldrb.w r12, [r2] -; CHECK-DSP-NEXT: ldrb.w lr, [r3] -; CHECK-DSP-NEXT: orr.w lr, lr, r12, lsl #8 -; CHECK-DSP-NEXT: orr.w r1, r0, r4, lsl #8 -; CHECK-DSP-NEXT: cmp r1, lr -; CHECK-DSP-NEXT: itttt ne -; CHECK-DSP-NEXT: ldrne r0, [sp, #8] -; CHECK-DSP-NEXT: ldrhne r0, [r0] -; CHECK-DSP-NEXT: lsrne.w r12, r0, #8 -; CHECK-DSP-NEXT: uxtbne r0, r0 -; CHECK-DSP-NEXT: strb.w r12, [r2] -; CHECK-DSP-NEXT: strb r0, [r3] -; CHECK-DSP-NEXT: pop {r4, pc} -; -; CHECK-DSP-IMM-LABEL: search_back_through_trunc: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: push {r4, lr} -; CHECK-DSP-IMM-NEXT: ldrb r4, [r0] -; CHECK-DSP-IMM-NEXT: ldrb.w r12, [r2] -; CHECK-DSP-IMM-NEXT: ldrb r0, [r1] -; CHECK-DSP-IMM-NEXT: ldrb.w lr, [r3] -; CHECK-DSP-IMM-NEXT: orr.w r4, r0, r4, lsl #8 -; CHECK-DSP-IMM-NEXT: orr.w r1, lr, r12, lsl #8 -; CHECK-DSP-IMM-NEXT: cmp r4, r1 -; CHECK-DSP-IMM-NEXT: beq .LBB23_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %if.else136 -; CHECK-DSP-IMM-NEXT: ldr r0, [sp, #8] -; CHECK-DSP-IMM-NEXT: ldrh r0, [r0] -; CHECK-DSP-IMM-NEXT: lsr.w r12, r0, #8 -; CHECK-DSP-IMM-NEXT: uxtb r0, r0 -; CHECK-DSP-IMM-NEXT: .LBB23_2: @ %if.end183 -; CHECK-DSP-IMM-NEXT: strb.w r12, [r2] -; CHECK-DSP-IMM-NEXT: strb r0, [r3] -; CHECK-DSP-IMM-NEXT: pop {r4, pc} -entry: - %0 = load i8, i8* %a, align 1 - %conv106 = zext i8 %0 to i16 - %shl = shl nuw i16 %conv106, 8 - %1 = load i8, i8* %b, align 1 - %conv108 = zext i8 %1 to i16 - %or109 = or i16 %shl, %conv108 - %2 = load i8, i8* %c, align 1 - %conv119 = zext i8 %2 to i16 - %shl120 = shl nuw i16 %conv119, 8 - %3 = load i8, i8* %d, align 1 - %conv122 = zext i8 %3 to i16 - %or123 = or i16 %shl120, %conv122 - %cmp133 = icmp eq i16 %or109, %or123 - br i1 %cmp133, label %if.end183, label %if.else136 - -if.else136: - %4 = load i16, i16* %e, align 2 - %extract.t854 = trunc i16 %4 to i8 - %extract856 = lshr i16 %4, 8 - %extract.t857 = trunc i16 %extract856 to i8 - br label %if.end183 - -if.end183: - %w.0.off0 = phi i8 [ %extract.t854, %if.else136 ], [ %1, %entry ] - %w.0.off8 = phi i8 [ %extract.t857, %if.else136 ], [ %2, %entry ] - store i8 %w.0.off8, i8* %c, align 1 - store i8 %w.0.off0, i8* %d, align 1 - ret void -} - -@c = common dso_local local_unnamed_addr global i16 0, align 2 -@b = common dso_local local_unnamed_addr global i16 0, align 2 -@f = common dso_local local_unnamed_addr global i32 0, align 4 -@e = common dso_local local_unnamed_addr global i8 0, align 1 -@a = common dso_local local_unnamed_addr global i8 0, align 1 -@d = common dso_local local_unnamed_addr global i32 0, align 4 - -define void @and_trunc_two_zext() { -; CHECK-NODSP-V8-LABEL: and_trunc_two_zext: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: movw r1, :lower16:b -; CHECK-NODSP-V8-NEXT: movt r1, :upper16:b -; CHECK-NODSP-V8-NEXT: ldrh r1, [r1] -; CHECK-NODSP-V8-NEXT: movw r3, :lower16:f -; CHECK-NODSP-V8-NEXT: sxth r2, r1 -; CHECK-NODSP-V8-NEXT: movt r3, :upper16:f -; CHECK-NODSP-V8-NEXT: str r2, [r3] -; CHECK-NODSP-V8-NEXT: movw r3, :lower16:a -; CHECK-NODSP-V8-NEXT: movt r3, :upper16:a -; CHECK-NODSP-V8-NEXT: movw r0, :lower16:c -; CHECK-NODSP-V8-NEXT: movw r2, :lower16:e -; CHECK-NODSP-V8-NEXT: ldrb r3, [r3] -; CHECK-NODSP-V8-NEXT: movt r0, :upper16:c -; CHECK-NODSP-V8-NEXT: and r1, r1, #1 -; CHECK-NODSP-V8-NEXT: movt r2, :upper16:e -; CHECK-NODSP-V8-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V8-NEXT: strb r1, [r2] -; CHECK-NODSP-V8-NEXT: muls r1, r3, r1 -; CHECK-NODSP-V8-NEXT: uxtb r1, r1 -; CHECK-NODSP-V8-NEXT: movw r2, :lower16:d -; CHECK-NODSP-V8-NEXT: orrs r0, r1 -; CHECK-NODSP-V8-NEXT: movt r2, :upper16:d -; CHECK-NODSP-V8-NEXT: lsls r0, r0, #16 -; CHECK-NODSP-V8-NEXT: str r1, [r2] -; CHECK-NODSP-V8-NEXT: it eq -; CHECK-NODSP-V8-NEXT: bxeq lr -; CHECK-NODSP-V8-NEXT: .p2align 2 -; CHECK-NODSP-V8-NEXT: .LBB24_1: @ %for.cond -; CHECK-NODSP-V8-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NODSP-V8-NEXT: b .LBB24_1 -; -; CHECK-NODSP-V7-LABEL: and_trunc_two_zext: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: movw r1, :lower16:b -; CHECK-NODSP-V7-NEXT: movw r2, :lower16:a -; CHECK-NODSP-V7-NEXT: movt r1, :upper16:b -; CHECK-NODSP-V7-NEXT: movt r2, :upper16:a -; CHECK-NODSP-V7-NEXT: ldrh r1, [r1] -; CHECK-NODSP-V7-NEXT: movw r0, :lower16:c -; CHECK-NODSP-V7-NEXT: ldrb r2, [r2] -; CHECK-NODSP-V7-NEXT: movt r0, :upper16:c -; CHECK-NODSP-V7-NEXT: and r3, r1, #1 -; CHECK-NODSP-V7-NEXT: ldrh.w r12, [r0] -; CHECK-NODSP-V7-NEXT: movw r0, :lower16:e -; CHECK-NODSP-V7-NEXT: muls r2, r3, r2 -; CHECK-NODSP-V7-NEXT: movt r0, :upper16:e -; CHECK-NODSP-V7-NEXT: strb r3, [r0] -; CHECK-NODSP-V7-NEXT: sxth r0, r1 -; CHECK-NODSP-V7-NEXT: movw r1, :lower16:f -; CHECK-NODSP-V7-NEXT: movt r1, :upper16:f -; CHECK-NODSP-V7-NEXT: str r0, [r1] -; CHECK-NODSP-V7-NEXT: movw r1, :lower16:d -; CHECK-NODSP-V7-NEXT: movt r1, :upper16:d -; CHECK-NODSP-V7-NEXT: uxtb r0, r2 -; CHECK-NODSP-V7-NEXT: str r0, [r1] -; CHECK-NODSP-V7-NEXT: orr.w r0, r0, r12 -; CHECK-NODSP-V7-NEXT: lsls r0, r0, #16 -; CHECK-NODSP-V7-NEXT: it eq -; CHECK-NODSP-V7-NEXT: bxeq lr -; CHECK-NODSP-V7-NEXT: .LBB24_1: @ %for.cond -; CHECK-NODSP-V7-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NODSP-V7-NEXT: b .LBB24_1 -; -; CHECK-DSP-LABEL: and_trunc_two_zext: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: movw r0, :lower16:b -; CHECK-DSP-NEXT: movw r2, :lower16:f -; CHECK-DSP-NEXT: movt r0, :upper16:b -; CHECK-DSP-NEXT: movt r2, :upper16:f -; CHECK-DSP-NEXT: ldrh r0, [r0] -; CHECK-DSP-NEXT: sxth r1, r0 -; CHECK-DSP-NEXT: and r0, r0, #1 -; CHECK-DSP-NEXT: str r1, [r2] -; CHECK-DSP-NEXT: movw r1, :lower16:e -; CHECK-DSP-NEXT: movt r1, :upper16:e -; CHECK-DSP-NEXT: strb r0, [r1] -; CHECK-DSP-NEXT: movw r1, :lower16:a -; CHECK-DSP-NEXT: movt r1, :upper16:a -; CHECK-DSP-NEXT: ldrb r1, [r1] -; CHECK-DSP-NEXT: muls r0, r1, r0 -; CHECK-DSP-NEXT: movw r1, :lower16:d -; CHECK-DSP-NEXT: uxtb r0, r0 -; CHECK-DSP-NEXT: movt r1, :upper16:d -; CHECK-DSP-NEXT: str r0, [r1] -; CHECK-DSP-NEXT: movw r1, :lower16:c -; CHECK-DSP-NEXT: movt r1, :upper16:c -; CHECK-DSP-NEXT: ldrh r1, [r1] -; CHECK-DSP-NEXT: orrs r0, r1 -; CHECK-DSP-NEXT: lsls r0, r0, #16 -; CHECK-DSP-NEXT: it eq -; CHECK-DSP-NEXT: bxeq lr -; CHECK-DSP-NEXT: .LBB24_1: @ %for.cond -; CHECK-DSP-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-DSP-NEXT: b .LBB24_1 -; -; CHECK-DSP-IMM-LABEL: and_trunc_two_zext: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: movw r1, :lower16:b -; CHECK-DSP-IMM-NEXT: movw r2, :lower16:a -; CHECK-DSP-IMM-NEXT: movt r1, :upper16:b -; CHECK-DSP-IMM-NEXT: movt r2, :upper16:a -; CHECK-DSP-IMM-NEXT: ldrh r1, [r1] -; CHECK-DSP-IMM-NEXT: movw r0, :lower16:c -; CHECK-DSP-IMM-NEXT: ldrb r2, [r2] -; CHECK-DSP-IMM-NEXT: movt r0, :upper16:c -; CHECK-DSP-IMM-NEXT: and r3, r1, #1 -; CHECK-DSP-IMM-NEXT: ldrh.w r12, [r0] -; CHECK-DSP-IMM-NEXT: movw r0, :lower16:e -; CHECK-DSP-IMM-NEXT: muls r2, r3, r2 -; CHECK-DSP-IMM-NEXT: movt r0, :upper16:e -; CHECK-DSP-IMM-NEXT: strb r3, [r0] -; CHECK-DSP-IMM-NEXT: sxth r0, r1 -; CHECK-DSP-IMM-NEXT: movw r1, :lower16:f -; CHECK-DSP-IMM-NEXT: movt r1, :upper16:f -; CHECK-DSP-IMM-NEXT: str r0, [r1] -; CHECK-DSP-IMM-NEXT: movw r1, :lower16:d -; CHECK-DSP-IMM-NEXT: uxtb r0, r2 -; CHECK-DSP-IMM-NEXT: movt r1, :upper16:d -; CHECK-DSP-IMM-NEXT: str r0, [r1] -; CHECK-DSP-IMM-NEXT: orr.w r0, r0, r12 -; CHECK-DSP-IMM-NEXT: lsls r0, r0, #16 -; CHECK-DSP-IMM-NEXT: beq .LBB24_2 -; CHECK-DSP-IMM-NEXT: .LBB24_1: @ %for.cond -; CHECK-DSP-IMM-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-DSP-IMM-NEXT: b .LBB24_1 -; CHECK-DSP-IMM-NEXT: .LBB24_2: @ %if.end -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %0 = load i16, i16* @c, align 2 - %1 = load i16, i16* @b, align 2 - %conv = sext i16 %1 to i32 - store i32 %conv, i32* @f, align 4 - %2 = trunc i16 %1 to i8 - %conv1 = and i8 %2, 1 - store i8 %conv1, i8* @e, align 1 - %3 = load i8, i8* @a, align 1 - %narrow = mul nuw i8 %3, %conv1 - %mul = zext i8 %narrow to i32 - store i32 %mul, i32* @d, align 4 - %4 = zext i8 %narrow to i16 - %conv5 = or i16 %0, %4 - %tobool = icmp eq i16 %conv5, 0 - br i1 %tobool, label %if.end, label %for.cond - -for.cond: - br label %for.cond - -if.end: - ret void -} - -define void @zext_urem_trunc() { -; CHECK-NODSP-V8-LABEL: zext_urem_trunc: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: movw r0, :lower16:c -; CHECK-NODSP-V8-NEXT: movt r0, :upper16:c -; CHECK-NODSP-V8-NEXT: ldrh r1, [r0] -; CHECK-NODSP-V8-NEXT: movw r0, :lower16:e -; CHECK-NODSP-V8-NEXT: movt r0, :upper16:e -; CHECK-NODSP-V8-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V8-NEXT: cbz r1, .LBB25_2 -; CHECK-NODSP-V8-NEXT: @ %bb.1: @ %cond.false -; CHECK-NODSP-V8-NEXT: udiv r2, r0, r1 -; CHECK-NODSP-V8-NEXT: mls r0, r2, r1, r0 -; CHECK-NODSP-V8-NEXT: .LBB25_2: @ %cond.end -; CHECK-NODSP-V8-NEXT: movw r1, :lower16:a -; CHECK-NODSP-V8-NEXT: movt r1, :upper16:a -; CHECK-NODSP-V8-NEXT: strb r0, [r1] -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: zext_urem_trunc: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r7, lr} -; CHECK-NODSP-V7-NEXT: push {r7, lr} -; CHECK-NODSP-V7-NEXT: movw r0, :lower16:e -; CHECK-NODSP-V7-NEXT: movw r1, :lower16:c -; CHECK-NODSP-V7-NEXT: movt r0, :upper16:e -; CHECK-NODSP-V7-NEXT: movt r1, :upper16:c -; CHECK-NODSP-V7-NEXT: ldrh r1, [r1] -; CHECK-NODSP-V7-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V7-NEXT: cbz r1, .LBB25_2 -; CHECK-NODSP-V7-NEXT: @ %bb.1: @ %cond.false -; CHECK-NODSP-V7-NEXT: bl __aeabi_uidivmod -; CHECK-NODSP-V7-NEXT: mov r0, r1 -; CHECK-NODSP-V7-NEXT: .LBB25_2: @ %cond.end -; CHECK-NODSP-V7-NEXT: movw r1, :lower16:a -; CHECK-NODSP-V7-NEXT: movt r1, :upper16:a -; CHECK-NODSP-V7-NEXT: strb r0, [r1] -; CHECK-NODSP-V7-NEXT: pop {r7, pc} -; -; CHECK-DSP-LABEL: zext_urem_trunc: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: movw r1, :lower16:c -; CHECK-DSP-NEXT: movw r0, :lower16:e -; CHECK-DSP-NEXT: movt r1, :upper16:c -; CHECK-DSP-NEXT: movt r0, :upper16:e -; CHECK-DSP-NEXT: ldrh r1, [r1] -; CHECK-DSP-NEXT: ldrb r0, [r0] -; CHECK-DSP-NEXT: cmp r1, #0 -; CHECK-DSP-NEXT: itt ne -; CHECK-DSP-NEXT: udivne r2, r0, r1 -; CHECK-DSP-NEXT: mlsne r0, r2, r1, r0 -; CHECK-DSP-NEXT: movw r1, :lower16:a -; CHECK-DSP-NEXT: movt r1, :upper16:a -; CHECK-DSP-NEXT: strb r0, [r1] -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: zext_urem_trunc: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: movw r0, :lower16:e -; CHECK-DSP-IMM-NEXT: movw r1, :lower16:c -; CHECK-DSP-IMM-NEXT: movt r0, :upper16:e -; CHECK-DSP-IMM-NEXT: movt r1, :upper16:c -; CHECK-DSP-IMM-NEXT: ldrh r1, [r1] -; CHECK-DSP-IMM-NEXT: ldrb r0, [r0] -; CHECK-DSP-IMM-NEXT: cbz r1, .LBB25_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %cond.false -; CHECK-DSP-IMM-NEXT: udiv r2, r0, r1 -; CHECK-DSP-IMM-NEXT: mls r0, r2, r1, r0 -; CHECK-DSP-IMM-NEXT: .LBB25_2: @ %cond.end -; CHECK-DSP-IMM-NEXT: movw r1, :lower16:a -; CHECK-DSP-IMM-NEXT: movt r1, :upper16:a -; CHECK-DSP-IMM-NEXT: strb r0, [r1] -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %0 = load i16, i16* @c, align 2 - %cmp = icmp eq i16 %0, 0 - %1 = load i8, i8* @e, align 1 - br i1 %cmp, label %cond.end, label %cond.false - -cond.false: - %rem.lhs.trunc = zext i8 %1 to i16 - %rem7 = urem i16 %rem.lhs.trunc, %0 - %rem.zext = trunc i16 %rem7 to i8 - br label %cond.end - -cond.end: - %cond = phi i8 [ %rem.zext, %cond.false ], [ %1, %entry ] - store i8 %cond, i8* @a, align 1 - ret void -} - -define i1 @dont_replace_trunc_1(i8* %a, i16* %b, i16* %c, i32* %d, i8* %e, i32* %f) { -; CHECK-NODSP-V8-LABEL: dont_replace_trunc_1: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: push {r4, lr} -; CHECK-NODSP-V8-NEXT: ldrh r1, [r1] -; CHECK-NODSP-V8-NEXT: ldrd r12, lr, [sp, #8] -; CHECK-NODSP-V8-NEXT: sxth r4, r1 -; CHECK-NODSP-V8-NEXT: and r1, r1, #1 -; CHECK-NODSP-V8-NEXT: ldrh r2, [r2] -; CHECK-NODSP-V8-NEXT: str.w r4, [lr] -; CHECK-NODSP-V8-NEXT: strb.w r1, [r12] -; CHECK-NODSP-V8-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V8-NEXT: muls r0, r1, r0 -; CHECK-NODSP-V8-NEXT: uxtb r1, r0 -; CHECK-NODSP-V8-NEXT: orr.w r0, r2, r1 -; CHECK-NODSP-V8-NEXT: uxth r0, r0 -; CHECK-NODSP-V8-NEXT: clz r0, r0 -; CHECK-NODSP-V8-NEXT: lsrs r0, r0, #5 -; CHECK-NODSP-V8-NEXT: str r1, [r3] -; CHECK-NODSP-V8-NEXT: pop {r4, pc} -; -; CHECK-NODSP-V7-LABEL: dont_replace_trunc_1: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r4, lr} -; CHECK-NODSP-V7-NEXT: push {r4, lr} -; CHECK-NODSP-V7-NEXT: ldrh r1, [r1] -; CHECK-NODSP-V7-NEXT: ldrd lr, r12, [sp, #8] -; CHECK-NODSP-V7-NEXT: ldrh r2, [r2] -; CHECK-NODSP-V7-NEXT: sxth r4, r1 -; CHECK-NODSP-V7-NEXT: and r1, r1, #1 -; CHECK-NODSP-V7-NEXT: str.w r4, [r12] -; CHECK-NODSP-V7-NEXT: strb.w r1, [lr] -; CHECK-NODSP-V7-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V7-NEXT: muls r0, r1, r0 -; CHECK-NODSP-V7-NEXT: uxtb r0, r0 -; CHECK-NODSP-V7-NEXT: str r0, [r3] -; CHECK-NODSP-V7-NEXT: orrs r0, r2 -; CHECK-NODSP-V7-NEXT: uxth r0, r0 -; CHECK-NODSP-V7-NEXT: clz r0, r0 -; CHECK-NODSP-V7-NEXT: lsrs r0, r0, #5 -; CHECK-NODSP-V7-NEXT: pop {r4, pc} -; -; CHECK-DSP-LABEL: dont_replace_trunc_1: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: push {r7, lr} -; CHECK-DSP-NEXT: ldrh r1, [r1] -; CHECK-DSP-NEXT: ldrh.w r12, [r2] -; CHECK-DSP-NEXT: ldr r2, [sp, #12] -; CHECK-DSP-NEXT: sxth.w lr, r1 -; CHECK-DSP-NEXT: and r1, r1, #1 -; CHECK-DSP-NEXT: str.w lr, [r2] -; CHECK-DSP-NEXT: ldr r2, [sp, #8] -; CHECK-DSP-NEXT: strb r1, [r2] -; CHECK-DSP-NEXT: ldrb r0, [r0] -; CHECK-DSP-NEXT: muls r0, r1, r0 -; CHECK-DSP-NEXT: uxtb r0, r0 -; CHECK-DSP-NEXT: str r0, [r3] -; CHECK-DSP-NEXT: orr.w r0, r0, r12 -; CHECK-DSP-NEXT: uxth r0, r0 -; CHECK-DSP-NEXT: clz r0, r0 -; CHECK-DSP-NEXT: lsrs r0, r0, #5 -; CHECK-DSP-NEXT: pop {r7, pc} -; -; CHECK-DSP-IMM-LABEL: dont_replace_trunc_1: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: push {r4, lr} -; CHECK-DSP-IMM-NEXT: ldrd lr, r12, [sp, #8] -; CHECK-DSP-IMM-NEXT: ldrh r1, [r1] -; CHECK-DSP-IMM-NEXT: ldrh r2, [r2] -; CHECK-DSP-IMM-NEXT: sxth r4, r1 -; CHECK-DSP-IMM-NEXT: str.w r4, [r12] -; CHECK-DSP-IMM-NEXT: and r1, r1, #1 -; CHECK-DSP-IMM-NEXT: strb.w r1, [lr] -; CHECK-DSP-IMM-NEXT: ldrb r0, [r0] -; CHECK-DSP-IMM-NEXT: muls r0, r1, r0 -; CHECK-DSP-IMM-NEXT: uxtb r0, r0 -; CHECK-DSP-IMM-NEXT: str r0, [r3] -; CHECK-DSP-IMM-NEXT: orrs r0, r2 -; CHECK-DSP-IMM-NEXT: uxth r0, r0 -; CHECK-DSP-IMM-NEXT: clz r0, r0 -; CHECK-DSP-IMM-NEXT: lsrs r0, r0, #5 -; CHECK-DSP-IMM-NEXT: pop {r4, pc} -entry: - %0 = load i16, i16* %c, align 2 - %1 = load i16, i16* %b, align 2 - %conv = sext i16 %1 to i32 - store i32 %conv, i32* %f, align 4 - %2 = trunc i16 %1 to i8 - %conv1 = and i8 %2, 1 - store i8 %conv1, i8* %e, align 1 - %3 = load i8, i8* %a, align 1 - %narrow = mul nuw i8 %3, %conv1 - %mul = zext i8 %narrow to i32 - store i32 %mul, i32* %d, align 4 - %4 = zext i8 %narrow to i16 - %conv5 = or i16 %0, %4 - %tobool = icmp eq i16 %conv5, 0 - ret i1 %tobool -} - -define i32 @dont_replace_trunc_2(i16* %a, i8* %b) { -; CHECK-NODSP-V8-LABEL: dont_replace_trunc_2: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V8-NEXT: cmp r0, #8 -; CHECK-NODSP-V8-NEXT: it ls -; CHECK-NODSP-V8-NEXT: movls r0, #0 -; CHECK-NODSP-V8-NEXT: ldrb r2, [r1] -; CHECK-NODSP-V8-NEXT: uxtb r0, r0 -; CHECK-NODSP-V8-NEXT: orrs r0, r2 -; CHECK-NODSP-V8-NEXT: strb r0, [r1] -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: dont_replace_trunc_2: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V7-NEXT: ldrb r2, [r1] -; CHECK-NODSP-V7-NEXT: cmp r0, #8 -; CHECK-NODSP-V7-NEXT: it ls -; CHECK-NODSP-V7-NEXT: movls r0, #0 -; CHECK-NODSP-V7-NEXT: uxtb r0, r0 -; CHECK-NODSP-V7-NEXT: orrs r0, r2 -; CHECK-NODSP-V7-NEXT: strb r0, [r1] -; CHECK-NODSP-V7-NEXT: bx lr -; -; CHECK-DSP-LABEL: dont_replace_trunc_2: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrh r0, [r0] -; CHECK-DSP-NEXT: cmp r0, #8 -; CHECK-DSP-NEXT: it ls -; CHECK-DSP-NEXT: movls r0, #0 -; CHECK-DSP-NEXT: ldrb r2, [r1] -; CHECK-DSP-NEXT: uxtb r0, r0 -; CHECK-DSP-NEXT: orrs r0, r2 -; CHECK-DSP-NEXT: strb r0, [r1] -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: dont_replace_trunc_2: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrh r0, [r0] -; CHECK-DSP-IMM-NEXT: movs r2, #0 -; CHECK-DSP-IMM-NEXT: ldrb r3, [r1] -; CHECK-DSP-IMM-NEXT: cmp r0, #8 -; CHECK-DSP-IMM-NEXT: it hi -; CHECK-DSP-IMM-NEXT: movhi r2, r0 -; CHECK-DSP-IMM-NEXT: uxtb r0, r2 -; CHECK-DSP-IMM-NEXT: orrs r0, r3 -; CHECK-DSP-IMM-NEXT: strb r0, [r1] -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %0 = load i16, i16* %a, align 2 - %cmp = icmp ugt i16 %0, 8 - %narrow = select i1 %cmp, i16 %0, i16 0 - %cond = trunc i16 %narrow to i8 - %1 = load i8, i8* %b, align 1 - %or = or i8 %1, %cond - store i8 %or, i8* %b, align 1 - %conv5 = zext i8 %or to i32 - ret i32 %conv5 -} - -define i32 @replace_trunk_with_mask(i16* %a) { -; CHECK-NODSP-V8-LABEL: replace_trunk_with_mask: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V8-NEXT: cmp r0, #0 -; CHECK-NODSP-V8-NEXT: itt eq -; CHECK-NODSP-V8-NEXT: moveq r0, #0 -; CHECK-NODSP-V8-NEXT: bxeq lr -; CHECK-NODSP-V8-NEXT: movw r1, #535 -; CHECK-NODSP-V8-NEXT: udiv r2, r1, r0 -; CHECK-NODSP-V8-NEXT: mls r0, r2, r0, r1 -; CHECK-NODSP-V8-NEXT: movw r1, #43691 -; CHECK-NODSP-V8-NEXT: uxtb r0, r0 -; CHECK-NODSP-V8-NEXT: movt r1, #43690 -; CHECK-NODSP-V8-NEXT: umull r0, r1, r0, r1 -; CHECK-NODSP-V8-NEXT: lsrs r0, r1, #1 -; CHECK-NODSP-V8-NEXT: bx lr -; -; CHECK-NODSP-V7-LABEL: replace_trunk_with_mask: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: .save {r7, lr} -; CHECK-NODSP-V7-NEXT: push {r7, lr} -; CHECK-NODSP-V7-NEXT: ldrh r1, [r0] -; CHECK-NODSP-V7-NEXT: cbz r1, .LBB28_2 -; CHECK-NODSP-V7-NEXT: @ %bb.1: @ %cond.false -; CHECK-NODSP-V7-NEXT: movw r0, #535 -; CHECK-NODSP-V7-NEXT: bl __aeabi_uidivmod -; CHECK-NODSP-V7-NEXT: uxtb r0, r1 -; CHECK-NODSP-V7-NEXT: movw r1, #43691 -; CHECK-NODSP-V7-NEXT: movt r1, #43690 -; CHECK-NODSP-V7-NEXT: umull r0, r1, r0, r1 -; CHECK-NODSP-V7-NEXT: lsrs r0, r1, #1 -; CHECK-NODSP-V7-NEXT: pop {r7, pc} -; CHECK-NODSP-V7-NEXT: .LBB28_2: -; CHECK-NODSP-V7-NEXT: movs r0, #0 -; CHECK-NODSP-V7-NEXT: pop {r7, pc} -; -; CHECK-DSP-LABEL: replace_trunk_with_mask: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrh r0, [r0] -; CHECK-DSP-NEXT: cmp r0, #0 -; CHECK-DSP-NEXT: itt eq -; CHECK-DSP-NEXT: moveq r0, #0 -; CHECK-DSP-NEXT: bxeq lr -; CHECK-DSP-NEXT: movw r1, #535 -; CHECK-DSP-NEXT: udiv r2, r1, r0 -; CHECK-DSP-NEXT: mls r0, r2, r0, r1 -; CHECK-DSP-NEXT: movw r1, #43691 -; CHECK-DSP-NEXT: uxtb r0, r0 -; CHECK-DSP-NEXT: movt r1, #43690 -; CHECK-DSP-NEXT: umull r0, r1, r0, r1 -; CHECK-DSP-NEXT: lsrs r0, r1, #1 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-DSP-IMM-LABEL: replace_trunk_with_mask: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrh r0, [r0] -; CHECK-DSP-IMM-NEXT: cbz r0, .LBB28_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: @ %cond.false -; CHECK-DSP-IMM-NEXT: movw r1, #535 -; CHECK-DSP-IMM-NEXT: udiv r2, r1, r0 -; CHECK-DSP-IMM-NEXT: mls r0, r2, r0, r1 -; CHECK-DSP-IMM-NEXT: movw r1, #43691 -; CHECK-DSP-IMM-NEXT: movt r1, #43690 -; CHECK-DSP-IMM-NEXT: uxtb r0, r0 -; CHECK-DSP-IMM-NEXT: umull r0, r1, r0, r1 -; CHECK-DSP-IMM-NEXT: lsrs r0, r1, #1 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .LBB28_2: -; CHECK-DSP-IMM-NEXT: movs r0, #0 -; CHECK-DSP-IMM-NEXT: bx lr -entry: - %0 = load i16, i16* %a - %cmp = icmp eq i16 %0, 0 - br i1 %cmp, label %cond.end, label %cond.false - -cond.false: - %1 = urem i16 535, %0 - %.lhs.trunc = trunc i16 %1 to i8 - %2 = udiv i8 %.lhs.trunc, 3 - %phitmp = zext i8 %2 to i32 - br label %cond.end - -cond.end: - %cond = phi i32 [ %phitmp, %cond.false ], [ 0, %entry ] - ret i32 %cond -} - -define float @test_i8_sitofp(i8* %ptr, i8 %arg) { -; CHECK-NODSP-V8-LABEL: test_i8_sitofp: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V8-NEXT: uxtb r2, r1 -; CHECK-NODSP-V8-NEXT: cmp r0, r2 -; CHECK-NODSP-V8-NEXT: bne .LBB29_2 -; CHECK-NODSP-V8-NEXT: @ %bb.1: -; CHECK-NODSP-V8-NEXT: vldr s0, .LCPI29_0 -; CHECK-NODSP-V8-NEXT: vmov r0, s0 -; CHECK-NODSP-V8-NEXT: bx lr -; CHECK-NODSP-V8-NEXT: .LBB29_2: @ %if.end -; CHECK-NODSP-V8-NEXT: sxtb r0, r1 -; CHECK-NODSP-V8-NEXT: vmov s0, r0 -; CHECK-NODSP-V8-NEXT: vcvt.f32.s32 s0, s0 -; CHECK-NODSP-V8-NEXT: vmov.f32 s2, #2.000000e+01 -; CHECK-NODSP-V8-NEXT: vdiv.f32 s0, s0, s2 -; CHECK-NODSP-V8-NEXT: vmov r0, s0 -; CHECK-NODSP-V8-NEXT: bx lr -; CHECK-NODSP-V8-NEXT: .p2align 2 -; CHECK-NODSP-V8-NEXT: @ %bb.3: -; CHECK-NODSP-V8-NEXT: .LCPI29_0: -; CHECK-NODSP-V8-NEXT: .long 0 @ float 0 -; -; CHECK-NODSP-V7-LABEL: test_i8_sitofp: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrb r0, [r0] -; CHECK-NODSP-V7-NEXT: uxtb r2, r1 -; CHECK-NODSP-V7-NEXT: cmp r0, r2 -; CHECK-NODSP-V7-NEXT: ittt eq -; CHECK-NODSP-V7-NEXT: vldreq s0, .LCPI29_0 -; CHECK-NODSP-V7-NEXT: vmoveq r0, s0 -; CHECK-NODSP-V7-NEXT: bxeq lr -; CHECK-NODSP-V7-NEXT: sxtb r0, r1 -; CHECK-NODSP-V7-NEXT: vmov.f32 s0, #2.000000e+01 -; CHECK-NODSP-V7-NEXT: vmov s2, r0 -; CHECK-NODSP-V7-NEXT: vcvt.f32.s32 s2, s2 -; CHECK-NODSP-V7-NEXT: vdiv.f32 s0, s2, s0 -; CHECK-NODSP-V7-NEXT: vmov r0, s0 -; CHECK-NODSP-V7-NEXT: bx lr -; CHECK-NODSP-V7-NEXT: .p2align 2 -; CHECK-NODSP-V7-NEXT: @ %bb.1: -; CHECK-NODSP-V7-NEXT: .LCPI29_0: -; CHECK-NODSP-V7-NEXT: .long 0 @ float 0 -; -; CHECK-DSP-LABEL: test_i8_sitofp: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrb r0, [r0] -; CHECK-DSP-NEXT: uxtb r2, r1 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: ittt eq -; CHECK-DSP-NEXT: vldreq s0, .LCPI29_0 -; CHECK-DSP-NEXT: vmoveq r0, s0 -; CHECK-DSP-NEXT: bxeq lr -; CHECK-DSP-NEXT: sxtb r0, r1 -; CHECK-DSP-NEXT: vmov.f32 s0, #2.000000e+01 -; CHECK-DSP-NEXT: vmov s2, r0 -; CHECK-DSP-NEXT: vcvt.f32.s32 s2, s2 -; CHECK-DSP-NEXT: vdiv.f32 s0, s2, s0 -; CHECK-DSP-NEXT: vmov r0, s0 -; CHECK-DSP-NEXT: bx lr -; CHECK-DSP-NEXT: .p2align 2 -; CHECK-DSP-NEXT: @ %bb.1: -; CHECK-DSP-NEXT: .LCPI29_0: -; CHECK-DSP-NEXT: .long 0 @ float 0 -; -; CHECK-DSP-IMM-LABEL: test_i8_sitofp: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrb r0, [r0] -; CHECK-DSP-IMM-NEXT: uxtb r2, r1 -; CHECK-DSP-IMM-NEXT: cmp r0, r2 -; CHECK-DSP-IMM-NEXT: bne .LBB29_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: -; CHECK-DSP-IMM-NEXT: vldr s0, .LCPI29_0 -; CHECK-DSP-IMM-NEXT: vmov r0, s0 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .LBB29_2: @ %if.end -; CHECK-DSP-IMM-NEXT: sxtb r0, r1 -; CHECK-DSP-IMM-NEXT: vmov.f32 s0, #2.000000e+01 -; CHECK-DSP-IMM-NEXT: vmov s2, r0 -; CHECK-DSP-IMM-NEXT: vcvt.f32.s32 s2, s2 -; CHECK-DSP-IMM-NEXT: vdiv.f32 s0, s2, s0 -; CHECK-DSP-IMM-NEXT: vmov r0, s0 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .p2align 2 -; CHECK-DSP-IMM-NEXT: @ %bb.3: -; CHECK-DSP-IMM-NEXT: .LCPI29_0: -; CHECK-DSP-IMM-NEXT: .long 0 @ float 0 -entry: - %0 = load i8, i8* %ptr, align 1 - %cmp = icmp eq i8 %0, %arg - br i1 %cmp, label %exit, label %if.end - -if.end: - %conv = sitofp i8 %arg to float - %div = fdiv float %conv, 2.000000e+01 - br label %exit - -exit: - %res = phi float [ 0.0, %entry ], [ %div, %if.end ] - ret float %res -} - -define float @test_i16_sitofp(i16* %ptr, i16 %arg) { -; CHECK-NODSP-V8-LABEL: test_i16_sitofp: -; CHECK-NODSP-V8: @ %bb.0: @ %entry -; CHECK-NODSP-V8-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V8-NEXT: uxth r2, r1 -; CHECK-NODSP-V8-NEXT: cmp r0, r2 -; CHECK-NODSP-V8-NEXT: bne .LBB30_2 -; CHECK-NODSP-V8-NEXT: @ %bb.1: -; CHECK-NODSP-V8-NEXT: vldr s0, .LCPI30_0 -; CHECK-NODSP-V8-NEXT: vmov r0, s0 -; CHECK-NODSP-V8-NEXT: bx lr -; CHECK-NODSP-V8-NEXT: .LBB30_2: @ %if.end -; CHECK-NODSP-V8-NEXT: sxth r0, r1 -; CHECK-NODSP-V8-NEXT: vmov s0, r0 -; CHECK-NODSP-V8-NEXT: vcvt.f32.s32 s0, s0 -; CHECK-NODSP-V8-NEXT: vmov.f32 s2, #2.000000e+01 -; CHECK-NODSP-V8-NEXT: vdiv.f32 s0, s0, s2 -; CHECK-NODSP-V8-NEXT: vmov r0, s0 -; CHECK-NODSP-V8-NEXT: bx lr -; CHECK-NODSP-V8-NEXT: .p2align 2 -; CHECK-NODSP-V8-NEXT: @ %bb.3: -; CHECK-NODSP-V8-NEXT: .LCPI30_0: -; CHECK-NODSP-V8-NEXT: .long 0 @ float 0 -; -; CHECK-NODSP-V7-LABEL: test_i16_sitofp: -; CHECK-NODSP-V7: @ %bb.0: @ %entry -; CHECK-NODSP-V7-NEXT: ldrh r0, [r0] -; CHECK-NODSP-V7-NEXT: uxth r2, r1 -; CHECK-NODSP-V7-NEXT: cmp r0, r2 -; CHECK-NODSP-V7-NEXT: ittt eq -; CHECK-NODSP-V7-NEXT: vldreq s0, .LCPI30_0 -; CHECK-NODSP-V7-NEXT: vmoveq r0, s0 -; CHECK-NODSP-V7-NEXT: bxeq lr -; CHECK-NODSP-V7-NEXT: sxth r0, r1 -; CHECK-NODSP-V7-NEXT: vmov.f32 s0, #2.000000e+01 -; CHECK-NODSP-V7-NEXT: vmov s2, r0 -; CHECK-NODSP-V7-NEXT: vcvt.f32.s32 s2, s2 -; CHECK-NODSP-V7-NEXT: vdiv.f32 s0, s2, s0 -; CHECK-NODSP-V7-NEXT: vmov r0, s0 -; CHECK-NODSP-V7-NEXT: bx lr -; CHECK-NODSP-V7-NEXT: .p2align 2 -; CHECK-NODSP-V7-NEXT: @ %bb.1: -; CHECK-NODSP-V7-NEXT: .LCPI30_0: -; CHECK-NODSP-V7-NEXT: .long 0 @ float 0 -; -; CHECK-DSP-LABEL: test_i16_sitofp: -; CHECK-DSP: @ %bb.0: @ %entry -; CHECK-DSP-NEXT: ldrh r0, [r0] -; CHECK-DSP-NEXT: uxth r2, r1 -; CHECK-DSP-NEXT: cmp r0, r2 -; CHECK-DSP-NEXT: ittt eq -; CHECK-DSP-NEXT: vldreq s0, .LCPI30_0 -; CHECK-DSP-NEXT: vmoveq r0, s0 -; CHECK-DSP-NEXT: bxeq lr -; CHECK-DSP-NEXT: sxth r0, r1 -; CHECK-DSP-NEXT: vmov.f32 s0, #2.000000e+01 -; CHECK-DSP-NEXT: vmov s2, r0 -; CHECK-DSP-NEXT: vcvt.f32.s32 s2, s2 -; CHECK-DSP-NEXT: vdiv.f32 s0, s2, s0 -; CHECK-DSP-NEXT: vmov r0, s0 -; CHECK-DSP-NEXT: bx lr -; CHECK-DSP-NEXT: .p2align 2 -; CHECK-DSP-NEXT: @ %bb.1: -; CHECK-DSP-NEXT: .LCPI30_0: -; CHECK-DSP-NEXT: .long 0 @ float 0 -; -; CHECK-DSP-IMM-LABEL: test_i16_sitofp: -; CHECK-DSP-IMM: @ %bb.0: @ %entry -; CHECK-DSP-IMM-NEXT: ldrh r0, [r0] -; CHECK-DSP-IMM-NEXT: uxth r2, r1 -; CHECK-DSP-IMM-NEXT: cmp r0, r2 -; CHECK-DSP-IMM-NEXT: bne .LBB30_2 -; CHECK-DSP-IMM-NEXT: @ %bb.1: -; CHECK-DSP-IMM-NEXT: vldr s0, .LCPI30_0 -; CHECK-DSP-IMM-NEXT: vmov r0, s0 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .LBB30_2: @ %if.end -; CHECK-DSP-IMM-NEXT: sxth r0, r1 -; CHECK-DSP-IMM-NEXT: vmov.f32 s0, #2.000000e+01 -; CHECK-DSP-IMM-NEXT: vmov s2, r0 -; CHECK-DSP-IMM-NEXT: vcvt.f32.s32 s2, s2 -; CHECK-DSP-IMM-NEXT: vdiv.f32 s0, s2, s0 -; CHECK-DSP-IMM-NEXT: vmov r0, s0 -; CHECK-DSP-IMM-NEXT: bx lr -; CHECK-DSP-IMM-NEXT: .p2align 2 -; CHECK-DSP-IMM-NEXT: @ %bb.3: -; CHECK-DSP-IMM-NEXT: .LCPI30_0: -; CHECK-DSP-IMM-NEXT: .long 0 @ float 0 -entry: - %0 = load i16, i16* %ptr, align 1 - %cmp = icmp eq i16 %0, %arg - br i1 %cmp, label %exit, label %if.end - -if.end: - %conv = sitofp i16 %arg to float - %div = fdiv float %conv, 2.000000e+01 - br label %exit - -exit: - %res = phi float [ 0.0, %entry ], [ %div, %if.end ] - ret float %res -} diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll deleted file mode 100644 index 76c9746c35566..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll +++ /dev/null @@ -1,332 +0,0 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; CHECK-COMMON-LABEL: test_ult_254_inc_imm: -; CHECK-DSP: adds r0, #1 -; CHECK-DSP-NEXT: uxtb r1, r0 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #254 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, #35 - -; CHECK-DSP-IMM: movs r1, #1 -; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1 -; CHECK-DSP-IMM-NEXT: movs r0, #47 -; CHECK-DSP-IMM-NEXT: cmp r1, #254 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, #35 -define i32 @test_ult_254_inc_imm(i8 zeroext %x) { -entry: - %add = add i8 %x, 1 - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_slt_254_inc_imm -; CHECK-COMMON: adds -; CHECK-COMMON: sxtb -define i32 @test_slt_254_inc_imm(i8 signext %x) { -entry: - %add = add i8 %x, 1 - %cmp = icmp slt i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_ult_254_inc_var: -; CHECK-NODSP: add r0, r1 -; CHECK-NODSP-NEXT: uxtb r1, r0 -; CHECK-NODSP-NEXT: movs r0, #47 -; CHECK-NODSP-NEXT: cmp r1, #254 -; CHECK-NODSP-NEXT: it lo -; CHECK-NODSP-NEXT: movlo r0, #35 - -; CHECK-DSP: uadd8 r1, r0, r1 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #254 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, #35 -define i32 @test_ult_254_inc_var(i8 zeroext %x, i8 zeroext %y) { -entry: - %add = add i8 %x, %y - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_sle_254_inc_var -; CHECK-COMMON: add -; CHECK-COMMON: sxtb -; CHECK-COMMON: cmp -define i32 @test_sle_254_inc_var(i8 %x, i8 %y) { -entry: - %add = add i8 %x, %y - %cmp = icmp sle i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_ugt_1_dec_imm: -; CHECK-COMMON: subs r1, r0, #1 -; CHECK-COMMON-NEXT: movs r0, #47 -; CHECK-COMMON-NEXT: cmp r1, #1 -; CHECK-COMMON-NEXT: it hi -; CHECK-COMMON-NEXT: movhi r0, #35 -define i32 @test_ugt_1_dec_imm(i8 zeroext %x) { -entry: - %add = add i8 %x, -1 - %cmp = icmp ugt i8 %add, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_sgt_1_dec_imm -; CHECK-COMMON: subs -; CHECK-COMMON: sxtb -; CHECK-COMMON: cmp -define i32 @test_sgt_1_dec_imm(i8 %x) { -entry: - %add = add i8 %x, -1 - %cmp = icmp sgt i8 %add, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_ugt_1_dec_var: -; CHECK-NODSP: subs r0, r0, r1 -; CHECK-NODSP-NEXT: uxtb r1, r0 -; CHECK-NODSP-NEXT: movs r0, #47 -; CHECK-NODSP-NEXT: cmp r1, #1 -; CHECK-NODSP-NEXT: it hi -; CHECK-NODSP-NEXT: movhi r0, #35 - -; CHECK-DSP: usub8 r1, r0, r1 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #1 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r0, #35 -define i32 @test_ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) { -entry: - %sub = sub i8 %x, %y - %cmp = icmp ugt i8 %sub, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_sge_1_dec_var -; CHECK-COMMON: sub -; CHECK-COMMON: sxtb -; CHECK-COMMON: cmp -define i32 @test_sge_1_dec_var(i8 %x, i8 %y) { -entry: - %sub = sub i8 %x, %y - %cmp = icmp sge i8 %sub, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: dsp_imm1: -; CHECK-DSP: eors r1, r0 -; CHECK-DSP-NEXT: and r0, r0, #7 -; CHECK-DSP-NEXT: subs r0, r0, r1 -; CHECK-DSP-NEXT: adds r0, #1 -; CHECK-DSP-NEXT: uxtb r1, r0 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #254 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, #35 - -; CHECK-DSP-IMM: eors r1, r0 -; CHECK-DSP-IMM-NEXT: and r0, r0, #7 -; CHECK-DSP-IMM-NEXT: usub8 r0, r0, r1 -; CHECK-DSP-IMM-NEXT: movs r1, #1 -; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1 -; CHECK-DSP-IMM-NEXT: movs r0, #47 -; CHECK-DSP-IMM-NEXT: cmp r1, #254 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, #35 -define i32 @dsp_imm1(i8 zeroext %x, i8 zeroext %y) { -entry: - %xor = xor i8 %x, %y - %and = and i8 %x, 7 - %sub = sub i8 %and, %xor - %add = add i8 %sub, 1 - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: dsp_var: -; CHECK-COMMON: eors r1, r0 -; CHECK-COMMON: and r2, r0, #7 -; CHECK-NODSP: subs r1, r2, r1 -; CHECK-NODSP: add.w r0, r1, r0, lsl #1 -; CHECK-NODSP: uxtb r1, r0 -; CHECK-DSP: usub8 r1, r2, r1 -; CHECK-DSP: lsls r0, r0, #1 -; CHECK-DSP: uadd8 r1, r1, r0 -; CHECK-DSP-NOT: uxt -; CHECK-COMMON: movs r0, #47 -; CHECK-COMMON: cmp r1, #254 -; CHECK-COMMON: it lo -; CHECK-COMMON: movlo r0, #35 -define i32 @dsp_var(i8 zeroext %x, i8 zeroext %y) { - %xor = xor i8 %x, %y - %and = and i8 %x, 7 - %sub = sub i8 %and, %xor - %mul = shl nuw i8 %x, 1 - %add = add i8 %sub, %mul - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: store_dsp_res -; CHECK-DSP: usub8 -; CHECK-DSP: strb -define void @store_dsp_res(i8* %in, i8* %out, i8 %compare) { - %first = getelementptr inbounds i8, i8* %in, i32 0 - %second = getelementptr inbounds i8, i8* %in, i32 1 - %ld0 = load i8, i8* %first - %ld1 = load i8, i8* %second - %xor = xor i8 %ld0, -1 - %cmp = icmp ult i8 %compare, %ld1 - %select = select i1 %cmp, i8 %compare, i8 %xor - %sub = sub i8 %ld0, %select - store i8 %sub, i8* %out, align 1 - ret void -} - -; CHECK-COMMON-LABEL: ugt_1_dec_imm: -; CHECK-COMMON: subs r1, r0, #1 -; CHECK-COMMON-NEXT: movs r0, #47 -; CHECK-COMMON-NEXT: cmp r1, #1 -; CHECK-COMMON-NEXT: it hi -; CHECK-COMMON-NEXT: movhi r0, #35 -define i32 @ugt_1_dec_imm(i8 zeroext %x) { -entry: - %add = add i8 %x, -1 - %cmp = icmp ugt i8 %add, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: ugt_1_dec_var: -; CHECK-NODSP: subs r0, r0, r1 -; CHECK-NODSP-NEXT: uxtb r1, r0 -; CHECK-NODSP-NEXT: movs r0, #47 -; CHECK-NODSP-NEXT: cmp r1, #1 -; CHECK-NODSP-NEXT: it hi -; CHECK-NODSP-NEXT: movhi r0, #35 - -; CHECK-DSP: usub8 r1, r0, r1 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #1 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r0, #35 -define i32 @ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) { -entry: - %sub = sub i8 %x, %y - %cmp = icmp ugt i8 %sub, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_eq_minus_one -; CHECK-COMMON: cmp {{r[0-9]+}}, #255 -define i32 @icmp_eq_minus_one(i8* %ptr) { - %load = load i8, i8* %ptr, align 1 - %conv = zext i8 %load to i32 - %cmp = icmp eq i8 %load, -1 - %ret = select i1 %cmp, i32 %conv, i32 -1 - ret i32 %ret -} - -; CHECK-COMMON-LABEL: icmp_not -; CHECK-COMMON: movw r2, #65535 -; CHECK-COMMON: eors r2, r0 -; CHECK-COMMON: movs r0, #32 -; CHECK-COMMON: cmp r2, r1 -define i32 @icmp_not(i16 zeroext %arg0, i16 zeroext %arg1) { - %not = xor i16 %arg0, -1 - %cmp = icmp eq i16 %not, %arg1 - %res = select i1 %cmp, i32 16, i32 32 - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_i1 -; CHECK-NOT: uxt -define i32 @icmp_i1(i1* %arg0, i1 zeroext %arg1, i32 %a, i32 %b) { -entry: - %load = load i1, i1* %arg0 - %not = xor i1 %load, 1 - %cmp = icmp eq i1 %arg1, %not - %res = select i1 %cmp, i32 %a, i32 %b - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_i7 -; CHECK-COMMON: ldrb -; CHECK-COMMON: cmp -define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) { -entry: - %load = load i7, i7* %arg0 - %add = add nuw i7 %load, 1 - %cmp = icmp ult i7 %arg1, %add - %res = select i1 %cmp, i32 %a, i32 %b - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_i15 -; CHECK-COMMON: movw [[MINUS_ONE:r[0-9]+]], #32767 -define i32 @icmp_i15(i15 zeroext %arg0, i15 zeroext %arg1) { - %xor = xor i15 %arg0, -1 - %cmp = icmp eq i15 %xor, %arg1 - %res = select i1 %cmp, i32 21, i32 42 - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_minus_imm -; CHECK-NODSP: subs [[SUB:r[0-9]+]], -; CHECK-NODSP: uxtb [[UXT:r[0-9]+]], -; CHECK-NODSP: cmp [[UXT]], #251 - -; CHECK-DSP: subs [[SUB:r[0-9]+]], -; CHECK-DSP: uxtb [[UXT:r[0-9]+]], -; CHECK-DSP: cmp [[UXT]], #251 - -; CHECK-DSP-IMM: ldrb [[A:r[0-9]+]], -; CHECK-DSP-IMM: movs [[MINUS_7:r[0-9]+]], #249 -; CHECK-DSP-IMM: uadd8 [[RES:r[0-9]+]], [[A]], [[MINUS_7]] -; CHECK-DSP-IMM: cmp [[RES]], #251 -define i32 @icmp_minus_imm(i8* %a) { -entry: - %0 = load i8, i8* %a, align 1 - %add.i = add i8 %0, -7 - %cmp = icmp ugt i8 %add.i, -5 - %conv1 = zext i1 %cmp to i32 - ret i32 %conv1 -} - -; CHECK-COMMON-LABEL: mul_with_neg_imm -; CHECK-COMMON-NOT: uxtb -; CHECK-COMMON: and [[BIT0:r[0-9]+]], r0, #1 -; CHECK-COMMON: add.w [[MUL32:r[0-9]+]], [[BIT0]], [[BIT0]], lsl #5 -; CHECK-COMMON: cmp.w r0, [[MUL32]], lsl #2 -define void @mul_with_neg_imm(i32, i32* %b) { -entry: - %1 = trunc i32 %0 to i8 - %2 = and i8 %1, 1 - %conv.i = mul nuw i8 %2, -124 - %tobool = icmp eq i8 %conv.i, 0 - br i1 %tobool, label %if.end, label %if.then - -if.then: - store i32 0, i32* %b, align 4 - br label %if.end - -if.end: - ret void -} diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll deleted file mode 100644 index c446ddbdd07a7..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll +++ /dev/null @@ -1,279 +0,0 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -mattr=-use-misched %s -arm-disable-cgp=false -o - | FileCheck %s - -; CHECK: overflow_add -; CHECK: add -; CHECK: uxth -; CHECK: cmp -define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) { - %add = add i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_sub -; CHECK: sub -; CHECK: uxth -; CHECK: cmp -define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) { - %add = sub i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_mul -; CHECK: mul -; CHECK: uxth -; CHECK: cmp -define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) { - %add = mul i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_shl -; CHECK-COMMON: lsl -; CHECK-COMMON: uxth -; CHECK-COMMON: cmp -define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) { - %add = shl i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_add_no_consts: -; CHECK: add r0, r1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], r2 -; CHECK: movhi r0, #8 -define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) { - %add = add i8 %a, %b - %cmp = icmp ugt i8 %add, %limit - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: overflow_add_const_limit: -; CHECK: add r0, r1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #128 -; CHECK: movhi r0, #8 -define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) { - %add = add i8 %a, %b - %cmp = icmp ugt i8 %add, 128 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: overflow_add_positive_const_limit: -; CHECK: adds r0, #1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #128 -; CHECK: movhi r0, #8 -define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { - %add = add i8 %a, 1 - %cmp = icmp ugt i8 %add, 128 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: unsafe_add_underflow: -; CHECK: movs r1, #16 -; CHECK: cmp r0, #1 -; CHECK: it eq -; CHECK: moveq r1, #8 -; CHECK: mov r0, r1 -define i32 @unsafe_add_underflow(i8 zeroext %a) { - %add = add i8 %a, -2 - %cmp = icmp ugt i8 %add, 254 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_add_underflow: -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #254 -; CHECK: movhi r0, #8 -define i32 @safe_add_underflow(i8 zeroext %a) { - %add = add i8 %a, -1 - %cmp = icmp ugt i8 %add, 254 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_add_underflow_neg: -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #2 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #251 -; CHECK: movlo r0, #8 -define i32 @safe_add_underflow_neg(i8 zeroext %a) { - %add = add i8 %a, -2 - %cmp = icmp ule i8 %add, -6 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: overflow_sub_negative_const_limit: -; CHECK: adds r0, #1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #128 -; CHECK: movhi r0, #8 -define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { - %sub = sub i8 %a, -1 - %cmp = icmp ugt i8 %sub, 128 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: unsafe_sub_underflow: -; CHECK: subs r0, #6 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #250 -; CHECK: movhi r0, #8 -define i32 @unsafe_sub_underflow(i8 zeroext %a) { - %sub = sub i8 %a, 6 - %cmp = icmp ugt i8 %sub, 250 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_sub_underflow: -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #255 -; CHECK: movlo r0, #8 -define i32 @safe_sub_underflow(i8 zeroext %a) { - %sub = sub i8 %a, 1 - %cmp = icmp ule i8 %sub, 254 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_sub_underflow_neg -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #4 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #250 -; CHECK: movhi r0, #8 -define i32 @safe_sub_underflow_neg(i8 zeroext %a) { - %sub = sub i8 %a, 4 - %cmp = icmp uge i8 %sub, -5 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: unsafe_sub_underflow_neg -; CHECK: subs r0, #4 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #253 -; CHECK: movlo r0, #8 -define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) { - %sub = sub i8 %a, 4 - %cmp = icmp ult i8 %sub, -3 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK: rsb.w [[RSUB:r[0-9]+]], r0, #248 -; CHECK-NOT: uxt -; CHECK: cmp [[RSUB]], #252 -define i32 @safe_sub_imm_var(i8* %b) { -entry: - %0 = load i8, i8* %b, align 1 - %sub = sub nuw nsw i8 -8, %0 - %cmp = icmp ugt i8 %sub, 252 - %conv4 = zext i1 %cmp to i32 - ret i32 %conv4 -} - -; CHECK-LABEL: safe_sub_var_imm -; CHECK: sub.w [[ADD:r[0-9]+]], r0, #248 -; CHECK-NOT: uxt -; CHECK: cmp [[ADD]], #252 -define i32 @safe_sub_var_imm(i8* %b) { -entry: - %0 = load i8, i8* %b, align 1 - %sub = sub nuw nsw i8 %0, -8 - %cmp = icmp ugt i8 %sub, 252 - %conv4 = zext i1 %cmp to i32 - ret i32 %conv4 -} - -; CHECK-LABEL: safe_add_imm_var -; CHECK: add.w [[ADD:r[0-9]+]], r0, #129 -; CHECK-NOT: uxt -; CHECK: cmp [[ADD]], #127 -define i32 @safe_add_imm_var(i8* %b) { -entry: - %0 = load i8, i8* %b, align 1 - %add = add nuw nsw i8 -127, %0 - %cmp = icmp ugt i8 %add, 127 - %conv4 = zext i1 %cmp to i32 - ret i32 %conv4 -} - -; CHECK-LABEL: safe_add_var_imm -; CHECK: add.w [[SUB:r[0-9]+]], r0, #129 -; CHECK-NOT: uxt -; CHECK: cmp [[SUB]], #127 -define i32 @safe_add_var_imm(i8* %b) { -entry: - %0 = load i8, i8* %b, align 1 - %add = add nuw nsw i8 %0, -127 - %cmp = icmp ugt i8 %add, 127 - %conv4 = zext i1 %cmp to i32 - ret i32 %conv4 -} - -; CHECK-LABEL: convert_add_order -; CHECK: orr{{.*}}, #1 -; CHECK: sub{{.*}}, #40 -; CHECK-NOT: uxt -define i8 @convert_add_order(i8 zeroext %arg) { - %mask.0 = and i8 %arg, 1 - %mask.1 = and i8 %arg, 2 - %shl = or i8 %arg, 1 - %add = add nuw i8 %shl, 10 - %cmp.0 = icmp ult i8 %add, 60 - %sub = add nsw i8 %shl, -40 - %cmp.1 = icmp ult i8 %sub, 20 - %mask.sel = select i1 %cmp.1, i8 %mask.0, i8 %mask.1 - %res = select i1 %cmp.0, i8 %mask.sel, i8 %arg - ret i8 %res -} - -; CHECK-LABEL: underflow_if_sub -; CHECK: add{{.}} [[ADD:r[0-9]+]], #245 -; CHECK: cmp [[ADD]], r1 -define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) { - %cmp = icmp sgt i32 %arg, 0 - %conv = zext i1 %cmp to i32 - %and = and i32 %arg, %conv - %trunc = trunc i32 %and to i8 - %conv1 = add nuw nsw i8 %trunc, -11 - %cmp.1 = icmp ult i8 %conv1, %arg1 - %res = select i1 %cmp.1, i8 %conv1, i8 100 - ret i8 %res -} - -; CHECK-LABEL: underflow_if_sub_signext -; CHECK: cmp r0, #0 -; CHECK-NEXT: uxtb r1, r1 -; CHECK-NOT: xtb -define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) { - %cmp = icmp sgt i32 %arg, 0 - %conv = zext i1 %cmp to i32 - %and = and i32 %arg, %conv - %trunc = trunc i32 %and to i8 - %conv1 = add nuw nsw i8 %trunc, -11 - %cmp.1 = icmp ugt i8 %arg1, %conv1 - %res = select i1 %cmp.1, i8 %conv1, i8 100 - ret i8 %res -} diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll deleted file mode 100644 index 9b07a80e9a1c1..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll +++ /dev/null @@ -1,218 +0,0 @@ -; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON - -; Test that ARMCodeGenPrepare can handle: -; - loops -; - call operands -; - call return values -; - ret instructions -; We use nuw on the arithmetic instructions to avoid complications. - -; Check that the arguments are extended but then nothing else is. -; This also ensures that the pass can handle loops. -; CHECK-COMMON-LABEL: phi_feeding_phi_args -; CHECK-COMMON: uxtb -; CHECK-COMMON: uxtb -; CHECK-NOT: uxtb -define void @phi_feeding_phi_args(i8 %a, i8 %b) { -entry: - %0 = icmp ugt i8 %a, %b - br i1 %0, label %preheader, label %empty - -empty: - br label %preheader - -preheader: - %1 = phi i8 [ %a, %entry ], [ %b, %empty ] - br label %loop - -loop: - %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 254 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = sub nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = shl nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp eq i8 %inc2, 255 - br i1 %cmp1, label %exit, label %loop - -exit: - ret void -} - -; Same as above, but as the args are zeroext, we shouldn't see any uxts. -; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args -; CHECK-COMMON-NOT: uxt -define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) { -entry: - %0 = icmp ugt i8 %a, %b - br i1 %0, label %preheader, label %empty - -empty: - br label %preheader - -preheader: - %1 = phi i8 [ %a, %entry ], [ %b, %empty ] - br label %loop - -loop: - %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 254 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = sub nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = shl nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp eq i8 %inc2, 255 - br i1 %cmp1, label %exit, label %loop - -exit: - ret void -} - -; Just check that phis also work with i16s. -; CHECK-COMMON-LABEL: phi_i16: -; CHECK-COMMON-NOT: uxt -define void @phi_i16() { -entry: - br label %loop - -loop: - %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i16 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i16 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i16 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp ult i16 %inc2, 253 - br i1 %cmp1, label %loop, label %exit - -exit: - ret void -} - -; CHECK-COMMON-LABEL: ret_i8 -; CHECK-COMMON-NOT: uxt -define i8 @ret_i8() { -entry: - br label %loop - -loop: - %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp ult i8 %inc2, 253 - br i1 %cmp1, label %exit, label %loop - -exit: - ret i8 %inc2 -} - -; CHECK-COMMON-LABEL: phi_multiple_undefs -; CHECK-COMMON-NOT: uxt -define i16 @phi_multiple_undefs(i16 zeroext %arg) { -entry: - br label %loop - -loop: - %val = phi i16 [ undef, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i16 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i16 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i16 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] - %unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ] - %cmp1 = icmp ult i16 %inc2, 253 - br i1 %cmp1, label %loop, label %exit - -exit: - ret i16 %unrelated -} - -; CHECK-COMMON-LABEL: promote_arg_return -; CHECK-COMMON-NOT: uxt -; CHECK-COMMON: strb -define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) { - %add = add nuw i16 %arg1, 15 - %mul = mul nuw nsw i16 %add, 3 - %cmp = icmp ult i16 %mul, %arg2 - %conv = zext i1 %cmp to i8 - store i8 %conv, i8* %res - ret i16 %arg1 -} - -; CHECK-COMMON-LABEL: signext_bitcast_phi_select -; CHECK: uxth [[UXT:r[0-9]+]], r0 -; CHECK: sxth [[SXT:r[0-9]+]], [[UXT]] -; CHECK: cmp [[SXT]], -; CHECK-NOT: xth -define i16 @signext_bitcast_phi_select(i16 signext %start, i16* %in) { -entry: - %const = bitcast i16 -1 to i16 - br label %for.body - -for.body: - %idx = phi i16 [ %select, %if.else ], [ %start, %entry ] - %cmp.i = icmp sgt i16 %idx, %const - br i1 %cmp.i, label %exit, label %if.then - -if.then: - %idx.next = getelementptr i16, i16* %in, i16 %idx - %ld = load i16, i16* %idx.next, align 2 - %cmp1.i = icmp eq i16 %ld, %idx - br i1 %cmp1.i, label %exit, label %if.else - -if.else: - %lobit = lshr i16 %idx, 15 - %lobit.not = xor i16 %lobit, 1 - %select = add nuw i16 %lobit.not, %idx - br label %for.body - -exit: - %res = phi i16 [ %ld, %if.then ], [ 0, %for.body ] - ret i16 %res -} diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll deleted file mode 100644 index e7f800232d45d..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll +++ /dev/null @@ -1,135 +0,0 @@ -; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s - -; CHECK-LABEL: phi_pointers -; CHECK-NOT: uxt -define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { -entry: - %add = add nuw i8 %M, 1 - %and = and i8 %add, 1 - %cmp = icmp ugt i8 %add, %N - %base = select i1 %cmp, i16* %a, i16* %b - %other = select i1 %cmp, i16* %b, i16* %b - br label %loop - -loop: - %ptr = phi i16* [ %base, %entry ], [ %gep, %loop ] - %idx = phi i8 [ %and, %entry ], [ %inc, %loop ] - %load = load i16, i16* %ptr, align 2 - %inc = add nuw nsw i8 %idx, 1 - %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc - %cond = icmp eq i16* %gep, %other - br i1 %cond, label %exit, label %loop - -exit: - ret void -} - -; CHECK-LABEL: phi_pointers_null -; CHECK-NOT: uxt -define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { -entry: - %add = add nuw i8 %M, 1 - %and = and i8 %add, 1 - %cmp = icmp ugt i8 %add, %N - %base = select i1 %cmp, i16* %a, i16* %b - %other = select i1 %cmp, i16* %b, i16* %b - %cmp.1 = icmp eq i16* %base, %other - br i1 %cmp.1, label %fail, label %loop - -fail: - br label %loop - -loop: - %ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ] - %idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ] - %undef = icmp eq i16* %ptr, undef - br i1 %undef, label %exit, label %if.then - -if.then: - %load = load i16, i16* %ptr, align 2 - %inc = add nuw nsw i8 %idx, 1 - %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc - %cond = icmp eq i16* %gep, %other - br i1 %cond, label %exit, label %loop - -exit: - ret void -} - -declare i8 @do_something_with_ptr(i8, i16*) - -; CHECK-LABEL: call_pointer -; CHECK-NOT: uxt -define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) { - %or = or i8 %x, %y - %shr = lshr i8 %or, 1 - %add = add nuw i8 %shr, 2 - %cmp = icmp ne i8 %add, 0 - %ptr = select i1 %cmp, i16* %a, i16* %b - %call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr) - ret i8 %call -} - -; CHECK-LABEL: pointer_to_pointer -; CHECK-NOT: uxt -define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) { -entry: - %addr = load i16*, i16** %arg - %val = load i16, i16* %addr - %add = add nuw i16 %val, 7 - %cmp = icmp ult i16 %add, 256 - %res = select i1 %cmp, i16 128, i16 255 - ret i16 %res -} - -; CHECK-LABEL: gep_2d_array -; CHECK-NOT: uxt -define i8 @gep_2d_array(i8** %a, i8 zeroext %arg) { -entry: - %arrayidx.us = getelementptr inbounds i8*, i8** %a, i32 0 - %0 = load i8*, i8** %arrayidx.us, align 4 - %1 = load i8, i8* %0, align 1 - %sub = sub nuw i8 %1, 1 - %cmp = icmp ult i8 %sub, %arg - %res = select i1 %cmp, i8 27, i8 54 - ret i8 %res -} - -; CHECK-LABEL: gep_2d_array_loop -; CHECK-NOT: uxt -define void @gep_2d_array_loop(i16** nocapture readonly %a, i16** nocapture readonly %b, i32 %N) { -entry: - %cmp30 = icmp eq i32 %N, 0 - br i1 %cmp30, label %for.cond.cleanup, label %for.cond1.preheader.us - -for.cond1.preheader.us: - %y.031.us = phi i32 [ %inc13.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ] - br label %for.body4.us - -for.body4.us: - %x.029.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] - %arrayidx.us = getelementptr inbounds i16*, i16** %a, i32 %x.029.us - %0 = load i16*, i16** %arrayidx.us, align 4 - %arrayidx5.us = getelementptr inbounds i16, i16* %0, i32 %y.031.us - %1 = load i16, i16* %arrayidx5.us, align 2 - %dec.us = add nuw i16 %1, -1 - %cmp6.us = icmp ult i16 %dec.us, 16383 - %shl.us = shl nuw i16 %dec.us, 2 - %spec.select.us = select i1 %cmp6.us, i16 %shl.us, i16 %dec.us - %arrayidx10.us = getelementptr inbounds i16*, i16** %b, i32 %x.029.us - %2 = load i16*, i16** %arrayidx10.us, align 4 - %arrayidx11.us = getelementptr inbounds i16, i16* %2, i32 %y.031.us - store i16 %spec.select.us, i16* %arrayidx11.us, align 2 - %inc.us = add nuw i32 %x.029.us, 1 - %exitcond = icmp eq i32 %inc.us, %N - br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us - -for.cond1.for.cond.cleanup3_crit_edge.us: - %inc13.us = add nuw i32 %y.031.us, 1 - %exitcond32 = icmp eq i32 %inc13.us, %N - br i1 %exitcond32, label %for.cond.cleanup, label %for.cond1.preheader.us - -for.cond.cleanup: - ret void -} diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll deleted file mode 100644 index 15030bd38660d..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll +++ /dev/null @@ -1,108 +0,0 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; CHECK-COMMON-LABEL: eq_sgt -; CHECK-NODSP: add -; CHECK-NODSP: uxtb -; CHECK-NODSP: sxtb -; CHECK-NODSP: cmp -; CHECK-NODSP: sub -; CHECK-NODSP: sxtb -; CHECK-NODSP: cmp - -; CHECK-DSP: uadd8 -; CHECK-DSP: sub -; CHECK-DSP: cmp -; CHECK-DSP: sxtb -; CHECK-DSP: sxtb -; CHECK-DSP: cmp - -; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]], -; CHECK-DSP-IMM: cmp [[ADD]], -; CHECK-DSP-IMM: subs [[SUB:r[0-9]+]], -; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]] -; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]] -; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]] -define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) { -entry: - %load0 = load i8, i8* %x, align 1 - %load1 = load i8, i8* %y, align 1 - %add = add i8 %load0, %z - %sub = sub i8 %load1, 1 - %cmp = icmp eq i8 %add, 200 - %cmp1 = icmp sgt i8 %sub, %add - %res0 = select i1 %cmp, i8 35, i8 47 - %res1 = select i1 %cmp1, i8 %res0, i8 %sub - ret i8 %res1 -} - -; CHECK-COMMON-LABEL: ugt_slt -; CHECK-NODSP: sub -; CHECK-NODSP: sxth -; CHECK-NODSP: uxth -; CHECK-NODSP: add -; CHECK-NODSP: sxth -; CHECK-NODSP: cmp -; CHECK-NODSP: cmp - -; CHECK-DSP: sub -; CHECK-DSP: sxth -; CHECK-DSP: add -; CHECK-DSP: uxth -; CHECK-DSP: sxth -; CHECK-DSP: cmp -; CHECK-DSP: cmp - -; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]], -; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]] -; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2 -; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]] -; CHECK-DSP-IMM-NOT: uxt -; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1 -; CHECK-DSP-IMM: usub16 [[SUB:r[0-9]+]], r1, [[ONE]] -; CHECK-DSP-IMM: cmp [[SUB]], r2 -define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) { -entry: - %load0 = load i16, i16* %x, align 1 - %add = add i16 %load0, %z - %sub = sub i16 %y, 1 - %cmp = icmp slt i16 %add, %z - %cmp1 = icmp ugt i16 %sub, %z - %res0 = select i1 %cmp, i16 35, i16 -1 - %res1 = select i1 %cmp1, i16 %res0, i16 0 - ret i16 %res1 -} - -; CHECK-COMMON-LABEL: urem_trunc_icmps -; CHECK-COMMON-NOT: uxt -; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]], -; CHECK-COMMON: cmp [[SEXT]], #7 -define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { -entry: - %ptr = load i16*, i16** %in, align 4 - %ld = load i16, i16* %ptr, align 2 - %cmp.i = icmp eq i16 %ld, 0 - br i1 %cmp.i, label %exit, label %cond.false.i - -cond.false.i: - %rem = urem i16 5, %ld - %extract.t = trunc i16 %rem to i8 - br label %body - -body: - %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] - %cmp = icmp sgt i8 %cond.in.i.off0, 7 - %conv5 = zext i1 %cmp to i32 - store i32 %conv5, i32* %g, align 4 - %.pr = load i32, i32* %k, align 4 - %tobool13150 = icmp eq i32 %.pr, 0 - br i1 %tobool13150, label %for.inc, label %exit - -for.inc: - %add = add nuw i8 %cond.in.i.off0, 1 - br label %body - -exit: - ret void -} diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed.ll deleted file mode 100644 index 596893724d203..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed.ll +++ /dev/null @@ -1,89 +0,0 @@ -; RUN: llc -mtriple=thumbv7em -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8m.main -mattr=+dsp -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv7 %s -arm-disable-cgp=false -o - | FileCheck %s -; RUN: llc -mtriple=armv8 %s -arm-disable-cgp=false -o - | FileCheck %s - -; Test to check that ARMCodeGenPrepare doesn't optimised away sign extends. -; CHECK-LABEL: test_signed_load: -; CHECK: uxth -define i16 @test_signed_load(i16* %ptr) { - %load = load i16, i16* %ptr - %conv0 = zext i16 %load to i32 - %conv1 = sext i16 %load to i32 - %cmp = icmp eq i32 %conv0, %conv1 - %conv2 = zext i1 %cmp to i16 - ret i16 %conv2 -} - -; Don't allow sign bit generating opcodes. -; CHECK-LABEL: test_ashr: -; CHECK: sxth -define i16 @test_ashr(i16 zeroext %arg) { - %ashr = ashr i16 %arg, 1 - %cmp = icmp eq i16 %ashr, 0 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; CHECK-LABEL: test_sdiv: -; CHECK: sxth -define i16 @test_sdiv(i16 zeroext %arg) { - %sdiv = sdiv i16 %arg, 2 - %cmp = icmp ne i16 %sdiv, 0 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; CHECK-LABEL: test_srem -; CHECK: sxth -define i16 @test_srem(i16 zeroext %arg) { - %srem = srem i16 %arg, 4 - %cmp = icmp ne i16 %srem, 0 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; CHECK-LABEL: test_signext_b -; CHECK: ldrb [[LDR:r[0-9]+]], [r0] -; CHECK: uxtab [[UXT:r[0-9]+]], [[LDR]], r1 -; CHECK: cm{{.*}} [[UXT]], #128 -define i32 @test_signext_b(i8* %ptr, i8 signext %arg) { -entry: - %0 = load i8, i8* %ptr, align 1 - %1 = add nuw nsw i8 %0, %arg - %cmp = icmp ult i8 %1, 128 - %res = select i1 %cmp, i32 42, i32 20894 - ret i32 %res -} - -; CHECK-LABEL: test_signext_b_ult_slt -; CHECK: ldrb [[LDR:r[0-9]+]], [r0] -; CHECK: uxtab [[ADD:r[0-9]+]], [[LDR]], r1 -; CHECK: uxtb [[UXT:r[0-9]+]], r1 -; CHECK: cmp [[ADD]], [[UXT]] -; CHECK: uxtb [[TRUNC:r[0-9]+]], [[ADD]] -; CHECK: cmp [[TRUNC]], #127 -define i32 @test_signext_b_ult_slt(i8* %ptr, i8 signext %arg) { -entry: - %0 = load i8, i8* %ptr, align 1 - %1 = add nuw nsw i8 %0, %arg - %cmp = icmp sle i8 %1, 126 - %cmp.1 = icmp ule i8 %1, %arg - %or = and i1 %cmp, %cmp.1 - %res = select i1 %or, i32 42, i32 57 - ret i32 %res -} - -; CHECK-LABEL: test_signext_h -; CHECK: ldrh [[LDR:r[0-9]+]], [r0] -; CHECK: uxtah [[ADD:r[0-9]+]], [[LDR]], r1 -; CHECK: cm{{.*}} [[ADD]], -define i32 @test_signext_h(i16* %ptr, i16 signext %arg) { -entry: - %0 = load i16, i16* %ptr, align 1 - %1 = add nuw nsw i16 %0, %arg - %cmp = icmp ult i16 %1, 32768 - %res = select i1 %cmp, i32 42, i32 20894 - ret i32 %res -} - diff --git a/llvm/test/CodeGen/ARM/CGP/arm-cgp-switch.ll b/llvm/test/CodeGen/ARM/CGP/arm-cgp-switch.ll deleted file mode 100644 index 29c35fbc96e00..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-switch.ll +++ /dev/null @@ -1,168 +0,0 @@ -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -o - | FileCheck %s -; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s - -; CHECK-LABEL: truncate_source_phi_switch -; CHECK: ldrb -; CHECK: uxtb -define void @truncate_source_phi_switch(i8* %memblock, i8* %store, i16 %arg) { -entry: - %pre = load i8, i8* %memblock, align 1 - %conv = trunc i16 %arg to i8 - br label %header - -header: - %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ] - %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ] - %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ] - switch i8 %phi.0, label %default [ - i8 43, label %for.inc.i - i8 45, label %for.inc.i.i - ] - -for.inc.i: - %xor = xor i8 %phi.1, 1 - br label %latch - -for.inc.i.i: - %and = and i8 %phi.1, 3 - br label %latch - -default: - %sub = sub i8 %phi.0, 1 - %cmp2 = icmp ugt i8 %sub, 4 - br i1 %cmp2, label %latch, label %exit - -latch: - %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ] - %count = add nuw i8 %phi.2, 1 - store i8 %count, i8* %store, align 1 - br label %header - -exit: - ret void -} - -; CHECK-LABEL: icmp_switch_source: -; CHECK-NOT: uxt -define i16 @icmp_switch_source(i16 zeroext %arg) { -entry: - %conv = add nuw i16 %arg, 15 - %mul = mul nuw nsw i16 %conv, 3 - switch i16 %arg, label %default [ - i16 0, label %sw.bb - i16 1, label %sw.bb.i - ] - -sw.bb: - %cmp0 = icmp ult i16 %mul, 127 - %select = select i1 %cmp0, i16 %mul, i16 127 - br label %exit - -sw.bb.i: - %cmp1 = icmp ugt i16 %mul, 34 - %select.i = select i1 %cmp1, i16 %mul, i16 34 - br label %exit - -default: - br label %exit - -exit: - %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] - ret i16 %res -} - -; CHECK-LABEL: icmp_switch_narrow_source: -; CHECK-NOT: uxt -define i16 @icmp_switch_narrow_source(i8 zeroext %arg) { -entry: - %conv = zext i8 %arg to i16 - %add = add nuw i16 %conv, 15 - %mul = mul nuw nsw i16 %add, 3 - switch i8 %arg, label %default [ - i8 0, label %sw.bb - i8 1, label %sw.bb.i - ] - -sw.bb: - %cmp0 = icmp ult i16 %mul, 127 - %select = select i1 %cmp0, i16 %mul, i16 127 - br label %exit - -sw.bb.i: - %cmp1 = icmp ugt i16 %mul, 34 - %select.i = select i1 %cmp1, i16 %mul, i16 34 - br label %exit - -default: - br label %exit - -exit: - %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] - ret i16 %res -} - -; CHECK-LABEL: icmp_switch_trunc: -; CHECK-NOT: uxt -define i16 @icmp_switch_trunc(i16 zeroext %arg) { -entry: - %conv = add nuw i16 %arg, 15 - %mul = mul nuw nsw i16 %conv, 3 - %trunc = trunc i16 %arg to i3 - switch i3 %trunc, label %default [ - i3 0, label %sw.bb - i3 1, label %sw.bb.i - ] - -sw.bb: - %cmp0 = icmp ult i16 %mul, 127 - %select = select i1 %cmp0, i16 %mul, i16 127 - br label %exit - -sw.bb.i: - %cmp1 = icmp ugt i16 %mul, 34 - %select.i = select i1 %cmp1, i16 %mul, i16 34 - br label %exit - -default: - br label %exit - -exit: - %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] - ret i16 %res -} - -%class.ae = type { i8 } -%class.x = type { i8 } -%class.v = type { %class.q } -%class.q = type { i16 } -declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr -declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr - -; CHECK-LABEL: trunc_i16_i9_switch -; CHECK-NOT: uxt -define i32 @trunc_i16_i9_switch(%class.ae* %this) { -entry: - %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this) - %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call) - %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0 - %1 = load i16, i16* %0, align 2 - %2 = trunc i16 %1 to i9 - %trunc = and i9 %2, -64 - switch i9 %trunc, label %cleanup.fold.split [ - i9 0, label %cleanup - i9 -256, label %if.then7 - ] - -if.then7: - %3 = and i16 %1, 7 - %tobool = icmp eq i16 %3, 0 - %cond = select i1 %tobool, i32 2, i32 1 - br label %cleanup - -cleanup.fold.split: - br label %cleanup - -cleanup: - %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ] - ret i32 %retval.0 -} diff --git a/llvm/test/CodeGen/ARM/CGP/clear-structures.ll b/llvm/test/CodeGen/ARM/CGP/clear-structures.ll deleted file mode 100644 index 86459c35dd60d..0000000000000 --- a/llvm/test/CodeGen/ARM/CGP/clear-structures.ll +++ /dev/null @@ -1,75 +0,0 @@ -; RUN: opt -arm-codegenprepare -arm-disable-cgp=false -mtriple=armv8 -verify %s -S -o - | FileCheck %s - -; CHECK: clear_structures -define i32 @clear_structures(i8* nocapture readonly %fmt, [1 x i32] %ap.coerce, i8* %out, void (i32, i8*)* nocapture %write) { -entry: - br label %while.cond.outer - -while.cond.outer: - %fmt.addr.0.ph = phi i8* [ %fmt, %entry ], [ %fmt.addr.3, %while.cond.outer.backedge ] - %0 = load i8, i8* %fmt.addr.0.ph, align 1 - br label %while.cond - -while.cond: - switch i8 %0, label %while.cond [ - i8 0, label %while.end48 - i8 37, label %while.cond2 - ] - -while.cond2: - %flags.0 = phi i32 [ %or, %while.cond2 ], [ 0, %while.cond ] - %fmt.addr.0.pn = phi i8* [ %fmt.addr.1, %while.cond2 ], [ %fmt.addr.0.ph, %while.cond ] - %fmt.addr.1 = getelementptr inbounds i8, i8* %fmt.addr.0.pn, i32 1 - %1 = load i8, i8* %fmt.addr.1, align 1 - ; CHECK: add i8 [[LOAD:%[^ ]+]], -32 - %sub = add i8 %1, -32 - %conv6 = zext i8 %sub to i32 - %shl = shl i32 1, %conv6 - %and = and i32 %shl, 75785 - %tobool7 = icmp eq i32 %and, 0 - %or = or i32 %shl, %flags.0 - br i1 %tobool7, label %while.cond10.preheader, label %while.cond2 - -while.cond10.preheader: - ; CHECK: [[ADD:%[^ ]+]] = add i8 [[LOAD]], -48 - ; CHECK: icmp ult i8 [[ADD]], 10 - %.off = add i8 %1, -48 - %2 = icmp ult i8 %.off, 10 - br i1 %2, label %while.cond10, label %while.end18.split - -while.cond10: - br label %while.cond10 - -while.end18.split: - %cmp20 = icmp eq i8 %1, 46 - br i1 %cmp20, label %if.then22, label %cond.end - -if.then22: - %incdec.ptr23 = getelementptr inbounds i8, i8* %fmt.addr.0.pn, i32 2 - %.pr74 = load i8, i8* %incdec.ptr23, align 1 - ; CHECK: [[LOAD2:[^ ]+]] = load i8, i8* - ; CHECK: [[ZEXT:[^ ]+]] = zext i8 [[LOAD2]] to i32 - ; CHECK: sub i32 [[ZEXT]], 48 - %.pr74.off = add i8 %.pr74, -48 - %3 = icmp ult i8 %.pr74.off, 10 - br i1 %3, label %while.cond24, label %cond.end - -while.cond24: - br label %while.cond24 - -cond.end: - %fmt.addr.3 = phi i8* [ %fmt.addr.1, %while.end18.split ], [ %incdec.ptr23, %if.then22 ] - %and39 = and i32 %flags.0, 2048 - %tobool40 = icmp eq i32 %and39, 0 - br i1 %tobool40, label %while.cond.outer.backedge, label %if.then43 - -while.cond.outer.backedge: - br label %while.cond.outer - -if.then43: - tail call void %write(i32 43, i8* %out) #1 - br label %while.cond.outer.backedge - -while.end48: - ret i32 undef -} diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir index de5545594bf39..9d66209211058 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir @@ -44,7 +44,7 @@ body: | %3(s1) = G_CONSTANT i1 1 G_STORE %3(s1), %4(p0) :: (store 1) ; CHECK-NOT: G_CONSTANT i1 - ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: {{%[0-9]+}}:_(s1) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i1 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index b413130558e63..0cdab2c41f798 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -1131,10 +1131,9 @@ body: | ; SOFT-NOT: G_FCMP ; For soft float we just need to return a '-1' constant, but the truncation ; to 1 bit is converted by the combiner to the following masking sequence. - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SOFT: [[MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SOFT: [[RCOPY:%[0-9]+]]:_(s32) = COPY [[R]](s32) - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_AND [[RCOPY]], [[MASK]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_AND [[RCOPY]], [[R]] ; SOFT-NOT: G_FCMP ; CHECK: $r0 = COPY [[REXT]] ... @@ -1853,11 +1852,10 @@ body: | ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(true), [[X]](s64), [[Y]] ; HARD: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) ; SOFT-NOT: G_FCMP - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; The result needs to be truncated, and the combiner turns the truncation ; into the following masking sequence. ; SOFT: [[MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SOFT: [[RCOPY:%[0-9]+]]:_(s32) = COPY [[R]] + ; SOFT: [[RCOPY:%[0-9]+]]:_(s32) = COPY [[MASK]] ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_AND [[RCOPY]], [[MASK]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index dd741388d7499..3fd35bd1e9d0a 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -40,7 +40,7 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Transform functions to use DSP intrinsics ; CHECK-NEXT: Interleaved Access Pass -; CHECK-NEXT: ARM IR optimizations +; CHECK-NEXT: Type Promotion ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare @@ -154,6 +154,7 @@ ; CHECK-NEXT: ARM constant island placement and branch shortening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: ReachingDefAnalysis ; CHECK-NEXT: ARM Low Overhead Loops pass ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll index e9143d814d3de..e3a48ed0c14f1 100644 --- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll +++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll @@ -95,48 +95,19 @@ define <2 x i1> @usubo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { define <2 x i1> @saddo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { ; CHECK-LABEL: saddo: ; CHECK: @ %bb.0: -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vld1.64 {d20, d21}, [r0] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vld1.64 {d18, d19}, [r1] -; CHECK-NEXT: vadd.i64 q8, q10, q9 -; CHECK-NEXT: vmov.32 r2, d20[0] -; CHECK-NEXT: vmov.32 r1, d20[1] -; CHECK-NEXT: vmov.32 r12, d16[0] -; CHECK-NEXT: vmov.32 r8, d16[1] -; CHECK-NEXT: vmov.32 lr, d17[0] -; CHECK-NEXT: vmov.32 r4, d21[0] -; CHECK-NEXT: vmov.32 r5, d17[1] -; CHECK-NEXT: vmov.32 r6, d18[1] -; CHECK-NEXT: vmov.32 r7, d21[1] -; CHECK-NEXT: subs.w r2, r12, r2 -; CHECK-NEXT: vmov.32 r2, d19[1] -; CHECK-NEXT: sbcs.w r1, r8, r1 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: subs.w r4, lr, r4 -; CHECK-NEXT: sbcs.w r7, r5, r7 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r3, #-1 -; CHECK-NEXT: asrs r7, r6, #31 -; CHECK-NEXT: vdup.32 d21, r3 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: vdup.32 d20, r1 +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vqadd.s64 q10, q9, q8 +; CHECK-NEXT: vadd.i64 q8, q9, q8 +; CHECK-NEXT: vceq.i32 q9, q8, q10 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0] -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vdup.32 d19, r2 -; CHECK-NEXT: vdup.32 d18, r7 -; CHECK-NEXT: veor q9, q9, q10 +; CHECK-NEXT: vrev64.32 q10, q9 +; CHECK-NEXT: vand q9, q9, q10 +; CHECK-NEXT: vmvn q9, q9 ; CHECK-NEXT: vmovn.i64 d18, q9 ; CHECK-NEXT: vmov r2, r1, d18 ; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: bx lr %x = load <2 x i64>, <2 x i64>* %ptr, align 8 %y = load <2 x i64>, <2 x i64>* %ptr2, align 8 %s = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> %x, <2 x i64> %y) @@ -149,64 +120,19 @@ define <2 x i1> @saddo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { define <2 x i1> @ssubo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { ; CHECK-LABEL: ssubo: ; CHECK: @ %bb.0: -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vld1.64 {d18, d19}, [r1] -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vld1.64 {d20, d21}, [r0] -; CHECK-NEXT: vsub.i64 q8, q10, q9 -; CHECK-NEXT: vmov.32 r1, d20[0] -; CHECK-NEXT: vmov.32 r12, d20[1] -; CHECK-NEXT: vmov.32 r3, d16[0] -; CHECK-NEXT: vmov.32 lr, d16[1] -; CHECK-NEXT: vmov.32 r4, d21[0] -; CHECK-NEXT: vmov.32 r5, d17[0] -; CHECK-NEXT: vmov.32 r6, d21[1] -; CHECK-NEXT: vmov.32 r7, d17[1] -; CHECK-NEXT: vmov.32 r8, d18[1] -; CHECK-NEXT: subs r1, r3, r1 -; CHECK-NEXT: vmov.32 r3, d18[0] -; CHECK-NEXT: sbcs.w r1, lr, r12 -; CHECK-NEXT: vmov.32 r12, d19[0] -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: subs r5, r5, r4 -; CHECK-NEXT: vmov.32 r5, d19[1] -; CHECK-NEXT: sbcs r7, r6 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: vdup.32 d21, r7 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: sbcs.w r3, r2, r8 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #1 -; CHECK-NEXT: rsbs.w r6, r12, #0 -; CHECK-NEXT: sbcs.w r6, r2, r5 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vdup.32 d19, r2 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r3, #-1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: vdup.32 d18, r3 -; CHECK-NEXT: vdup.32 d20, r1 -; CHECK-NEXT: veor q9, q9, q10 +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vqsub.s64 q10, q9, q8 +; CHECK-NEXT: vsub.i64 q8, q9, q8 +; CHECK-NEXT: vceq.i32 q9, q8, q10 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0] +; CHECK-NEXT: vrev64.32 q10, q9 +; CHECK-NEXT: vand q9, q9, q10 +; CHECK-NEXT: vmvn q9, q9 ; CHECK-NEXT: vmovn.i64 d18, q9 ; CHECK-NEXT: vmov r2, r1, d18 ; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: bx lr %x = load <2 x i64>, <2 x i64>* %ptr, align 8 %y = load <2 x i64>, <2 x i64>* %ptr2, align 8 %s = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> %x, <2 x i64> %y) diff --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll new file mode 100644 index 0000000000000..925fed5828112 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll @@ -0,0 +1,261 @@ +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-THUMB,CHECK +; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-ARM,CHECK + +define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_ne: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ne i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_eq: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp eq i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_gt: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_ge: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sge i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_lt: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_le: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sle i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_hi: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_hi: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovhi.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_hs: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it hs +; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_hs: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovhs.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp uge i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_lo: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it lo +; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_lo: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovlo.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_ls: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it ls +; CHECK-THUMB-NEXT: vmovls.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_ls: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovls.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ule i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll new file mode 100644 index 0000000000000..8d4a6376a9771 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -0,0 +1,557 @@ +; RUN: llc -mtriple=armv8a-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SP,CHECK-DP +; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOSP,CHECK-NODP +; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - -mattr=fp-armv8 | FileCheck %s --check-prefixes=CHECK,CHECK-SP,CHECK-DP +; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - -mattr=fp-armv8sp | FileCheck %s --check-prefixes=CHECK,CHECK-SP,CHECK-NODP + +; Check that constrained fp intrinsics are correctly lowered. In particular +; check that the valid combinations of single-precision and double-precision +; hardware being present or absent work as expected (i.e. we get an instruction +; when one is available, otherwise a libcall). + +; FIXME: Tests fails as various things in CodeGen and Target/ARM need fixing. +; XFAIL: * + + +; Single-precision intrinsics + +; CHECK-LABEL: add_f32: +; CHECK-NOSP: bl __aeabi_fadd +; CHECK-SP: vadd.f32 +define float @add_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: sub_f32: +; CHECK-NOSP: bl __aeabi_fsub +; CHECK-SP: vsub.f32 +define float @sub_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: mul_f32: +; CHECK-NOSP: bl __aeabi_fmul +; CHECK-SP: vmul.f32 +define float @mul_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: div_f32: +; CHECK-NOSP: bl __aeabi_fdiv +; CHECK-SP: vdiv.f32 +define float @div_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: frem_f32: +; CHECK: bl fmodf +define float @frem_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fma_f32: +; CHECK-NOSP: bl fmaf +; CHECK-SP: vfma.f32 +define float @fma_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fptosi_f32: +; CHECK-NOSP: bl __aeabi_f2iz +; CHECK-SP: vcvt.s32.f32 +define i32 @fptosi_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptosi.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: fptoui_f32: +; CHECK-NOSP: bl __aeabi_f2uiz +; CHECK-SP: vcvt.u32.f32 +define i32 @fptoui_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptoui.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: sqrt_f32: +; CHECK-NOSP: bl sqrtf +; CHECK-SP: vsqrt.f32 +define float @sqrt_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: powi_f32: +; CHECK: bl __powisf2 +define float @powi_f32(float %x, i32 %y) #0 { + %val = call float @llvm.experimental.constrained.powi.f32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: sin_f32: +; CHECK: bl sinf +define float @sin_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: cos_f32: +; CHECK: bl cosf +define float @cos_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: pow_f32: +; CHECK: bl powf +define float @pow_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: log_f32: +; CHECK: bl logf +define float @log_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: log10_f32: +; CHECK: bl log10f +define float @log10_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: log2_f32: +; CHECK: bl log2f +define float @log2_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.log2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: exp_f32: +; CHECK: bl expf +define float @exp_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: exp2_f32: +; CHECK: bl exp2f +define float @exp2_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.exp2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: rint_f32: +; CHECK-NOSP: bl rintf +; CHECK-SP: vrintx.f32 +define float @rint_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.rint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: nearbyint_f32: +; CHECK-NOSP: bl nearbyintf +; CHECK-SP: vrintr.f32 +define float @nearbyint_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.nearbyint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: lrint_f32: +; CHECK: bl lrintf +define i32 @lrint_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.lrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llrint_f32: +; CHECK: bl llrintf +define i32 @llrint_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.llrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: maxnum_f32: +; CHECK-NOSP: bl fmaxf +; CHECK-SP: vmaxnm.f32 +define float @maxnum_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.maxnum.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: minnum_f32: +; CHECK-NOSP: bl fminf +; CHECK-SP: vminnm.f32 +define float @minnum_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.minnum.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: ceil_f32: +; CHECK-NOSP: bl ceilf +; CHECK-SP: vrintp.f32 +define float @ceil_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: floor_f32: +; CHECK-NOSP: bl floorf +; CHECK-SP: vrintm.f32 +define float @floor_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: lround_f32: +; CHECK: bl lroundf +define i32 @lround_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.lround.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llround_f32: +; CHECK: bl llroundf +define i32 @llround_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.llround.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: round_f32: +; CHECK-NOSP: bl roundf +; CHECK-SP: vrinta.f32 +define float @round_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.round.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: trunc_f32: +; CHECK-NOSP: bl truncf +; CHECK-SP: vrintz.f32 +define float @trunc_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.trunc.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + + +; Double-precision intrinsics + +; CHECK-LABEL: add_f64: +; CHECK-NODP: bl __aeabi_dadd +; CHECK-DP: vadd.f64 +define double @add_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: sub_f64: +; CHECK-NODP: bl __aeabi_dsub +; CHECK-DP: vsub.f64 +define double @sub_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: mul_f64: +; CHECK-NODP: bl __aeabi_dmul +; CHECK-DP: vmul.f64 +define double @mul_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: div_f64: +; CHECK-NODP: bl __aeabi_ddiv +; CHECK-DP: vdiv.f64 +define double @div_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fdiv.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: frem_f64: +; CHECK: bl fmod +define double @frem_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.frem.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: fma_f64: +; CHECK-NODP: bl fma +; CHECK-DP: vfma.f64 +define double @fma_f64(double %x, double %y, double %z) #0 { + %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: fptosi_f64: +; CHECK-NODP: bl __aeabi_d2iz +; CHECK-DP: vcvt.s32.f64 +define i32 @fptosi_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptosi.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: fptoui_f64: +; CHECK-NODP: bl __aeabi_d2uiz +; CHECK-DP: vcvt.u32.f64 +define i32 @fptoui_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptoui.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: sqrt_f64: +; CHECK-NODP: bl sqrt +; CHECK-DP: vsqrt.f64 +define double @sqrt_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.sqrt.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: powi_f64: +; CHECK: bl __powidf2 +define double @powi_f64(double %x, i32 %y) #0 { + %val = call double @llvm.experimental.constrained.powi.f64(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: sin_f64: +; CHECK: bl sin +define double @sin_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.sin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: cos_f64: +; CHECK: bl cos +define double @cos_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.cos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: pow_f64: +; CHECK: bl pow +define double @pow_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.pow.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: log_f64: +; CHECK: bl log +define double @log_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.log.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: log10_f64: +; CHECK: bl log10 +define double @log10_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.log10.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: log2_f64: +; CHECK: bl log2 +define double @log2_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.log2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: exp_f64: +; CHECK: bl exp +define double @exp_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.exp.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: exp2_f64: +; CHECK: bl exp2 +define double @exp2_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.exp2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: rint_f64: +; CHECK-NODP: bl rint +; CHECK-DP: vrintx.f64 +define double @rint_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.rint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: nearbyint_f64: +; CHECK-NODP: bl nearbyint +; CHECK-DP: vrintr.f64 +define double @nearbyint_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.nearbyint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: lrint_f64: +; CHECK: bl lrint +define i32 @lrint_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.lrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llrint_f64: +; CHECK: bl llrint +define i32 @llrint_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.llrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: maxnum_f64: +; CHECK-NODP: bl fmax +; CHECK-DP: vmaxnm.f64 +define double @maxnum_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.maxnum.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: minnum_f64: +; CHECK-NODP: bl fmin +; CHECK-DP: vminnm.f64 +define double @minnum_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.minnum.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: ceil_f64: +; CHECK-NODP: bl ceil +; CHECK-DP: vrintp.f64 +define double @ceil_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.ceil.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: floor_f64: +; CHECK-NODP: bl floor +; CHECK-DP: vrintm.f64 +define double @floor_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.floor.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: lround_f64: +; CHECK: bl lround +define i32 @lround_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.lround.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llround_f64: +; CHECK: bl llround +define i32 @llround_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.llround.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: round_f64: +; CHECK-NODP: bl round +; CHECK-DP: vrinta.f64 +define double @round_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.round.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: trunc_f64: +; CHECK-NODP: bl trunc +; CHECK-DP: vrintz.f64 +define double @trunc_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.trunc.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + + +; Single/Double conversion intrinsics + +; CHECK-LABEL: fptrunc_f32: +; CHECK-NODP: bl __aeabi_d2f +; CHECK-DP: vcvt.f32.f64 +define float @fptrunc_f32(double %x) #0 { + %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fpext_f32: +; CHECK-NODP: bl __aeabi_f2d +; CHECK-DP: vcvt.f64.f32 +define double @fpext_f32(float %x) #0 { + %val = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, metadata !"fpexcept.strict") #0 + ret double %val +} + + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.f32(float, metadata) +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) +declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.log2.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.f32(float, metadata) +declare i32 @llvm.experimental.constrained.llround.f32(float, metadata) +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.f64(double, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) +declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.f64(double, metadata) +declare i32 @llvm.experimental.constrained.llround.f64(double, metadata) +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) + +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) diff --git a/llvm/test/CodeGen/ARM/neon-v8.1a.ll b/llvm/test/CodeGen/ARM/neon-v8.1a.ll index 91259139d4463..95d2085800810 100644 --- a/llvm/test/CodeGen/ARM/neon-v8.1a.ll +++ b/llvm/test/CodeGen/ARM/neon-v8.1a.ll @@ -8,20 +8,20 @@ declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v4i16: %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) - %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <4 x i16> %retval } @@ -29,7 +29,7 @@ define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v8i16: %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) - %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <8 x i16> %retval } @@ -37,7 +37,7 @@ define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v2i32: %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) - %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <2 x i32> %retval } @@ -45,7 +45,7 @@ define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v4i32: %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) - %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <4 x i32> %retval } @@ -53,7 +53,7 @@ define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v4i16: %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) - %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <4 x i16> %retval } @@ -61,7 +61,7 @@ define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v8i16: %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) - %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <8 x i16> %retval } @@ -69,7 +69,7 @@ define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v2i32: %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) - %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <2 x i32> %retval } @@ -77,7 +77,7 @@ define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v4i32: %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) - %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <4 x i32> %retval } @@ -90,7 +90,7 @@ define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) - %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] ret <4 x i16> %retval } @@ -100,7 +100,7 @@ define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16 entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) - %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] ret <8 x i16> %retval } @@ -110,7 +110,7 @@ define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) - %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] ret <2 x i32> %retval } @@ -120,7 +120,7 @@ define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) - %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] ret <4 x i32> %retval } @@ -130,7 +130,7 @@ define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) - %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] ret <4 x i16> %retval } @@ -140,7 +140,7 @@ define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16 entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) - %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] ret <8 x i16> %retval } @@ -150,7 +150,7 @@ define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) - %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] ret <2 x i32> %retval } @@ -160,7 +160,7 @@ define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) - %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] ret <4 x i32> %retval } diff --git a/llvm/test/CodeGen/ARM/neon-vcadd.ll b/llvm/test/CodeGen/ARM/neon-vcadd.ll new file mode 100644 index 0000000000000..93a85c8c73c6c --- /dev/null +++ b/llvm/test/CodeGen/ARM/neon-vcadd.ll @@ -0,0 +1,54 @@ +; RUN: llc %s -mtriple=arm -mattr=+armv8.3-a,+fullfp16 -o - | FileCheck %s + +define <4 x half> @foo16x4_rot(<4 x half> %a, <4 x half> %b) { +entry: +; CHECK-LABEL: foo16x4_rot +; CHECK-DAG: vcadd.f16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #270 + %vcadd_rot90_v2.i = tail call <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16(<4 x half> %a, <4 x half> %b) + %vcadd_rot270_v2.i = tail call <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16(<4 x half> %a, <4 x half> %b) + %add = fadd <4 x half> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <4 x half> %add +} + +define <2 x float> @foo32x2_rot(<2 x float> %a, <2 x float> %b) { +entry: +; CHECK-LABEL: foo32x2_rot +; CHECK-DAG: vcadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #270 + %vcadd_rot90_v2.i = tail call <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32(<2 x float> %a, <2 x float> %b) + %vcadd_rot270_v2.i = tail call <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32(<2 x float> %a, <2 x float> %b) + %add = fadd <2 x float> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <2 x float> %add +} + +define <8 x half> @foo16x8_rot(<8 x half> %a, <8 x half> %b) { +entry: +; CHECK-LABEL: foo16x8_rot +; CHECK-DAG: vcadd.f16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270 + %vcaddq_rot90_v2.i = tail call <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16(<8 x half> %a, <8 x half> %b) + %vcaddq_rot270_v2.i = tail call <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16(<8 x half> %a, <8 x half> %b) + %add = fadd <8 x half> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <8 x half> %add +} + +define <4 x float> @foo32x4_rot(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: foo32x4_rot +; CHECK-DAG: vcadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270 + %vcaddq_rot90_v2.i = tail call <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32(<4 x float> %a, <4 x float> %b) + %vcaddq_rot270_v2.i = tail call <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32(<4 x float> %a, <4 x float> %b) + %add = fadd <4 x float> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <4 x float> %add +} + +declare <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16(<4 x half>, <4 x half>) +declare <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32(<2 x float>, <2 x float>) +declare <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16(<8 x half>, <8 x half>) +declare <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16(<8 x half>, <8 x half>) +declare <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll b/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll new file mode 100644 index 0000000000000..a1323810151a5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll @@ -0,0 +1,330 @@ +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s + +define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqadds8: +;CHECK: vqadd.s8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqadds16: +;CHECK: vqadd.s16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqadds32: +;CHECK: vqadd.s32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqadds64: +;CHECK: vqadd.s64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqaddu8: +;CHECK: vqadd.u8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqaddu16: +;CHECK: vqadd.u16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqaddu32: +;CHECK: vqadd.u32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqaddu64: +;CHECK: vqadd.u64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqaddQs8: +;CHECK: vqadd.s8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqaddQs16: +;CHECK: vqadd.s16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqaddQs32: +;CHECK: vqadd.s32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqaddQs64: +;CHECK: vqadd.s64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqaddQu8: +;CHECK: vqadd.u8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqaddQu16: +;CHECK: vqadd.u16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqaddQu32: +;CHECK: vqadd.u32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqaddQu64: +;CHECK: vqadd.u64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + + +define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubs8: +;CHECK: vqsub.s8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubs16: +;CHECK: vqsub.s16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubs32: +;CHECK: vqsub.s32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubs64: +;CHECK: vqsub.s64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubu8: +;CHECK: vqsub.u8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubu16: +;CHECK: vqsub.u16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubu32: +;CHECK: vqsub.u32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubu64: +;CHECK: vqsub.u64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubQs8: +;CHECK: vqsub.s8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubQs16: +;CHECK: vqsub.s16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubQs32: +;CHECK: vqsub.s32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubQs64: +;CHECK: vqsub.s64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubQu8: +;CHECK: vqsub.u8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubQu16: +;CHECK: vqsub.u16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubQu32: +;CHECK: vqsub.u32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubQu64: +;CHECK: vqsub.u64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll b/llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll deleted file mode 100644 index 0b6fd7443af29..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll +++ /dev/null @@ -1,44 +0,0 @@ -; Using VLAs(Variable Length Arrays) in a function will use R6 to keep track -; of the stack frame, and also spill/restore R6 to the stack. -; This tests that using -ffixed-r6 (-mattr=+reserve-r6) will stop R6 -; being used and also stop it being spilled/restored to the stack. -; RUN: llc < %s -mcpu=cortex-m0 -mtriple=thumbv7-arm-none-eabi | FileCheck %s --check-prefix=CHECK-STATIC --check-prefix=CHECK-R6 -; RUN: llc < %s -mcpu=cortex-m0 -mtriple=thumbv7-arm-none-eabi -mattr=+reserve-r6 | FileCheck %s --check-prefix=CHECK-STATIC --check-prefix=CHECK-NO-R6 - -define void @f() #0 { -entry: - %i = alloca i32, align 4 - store i32 0, i32* %i, align 4 - - %saved_stack = alloca i8*, align 4 - %0 = call i8* @llvm.stacksave() - store i8* %0, i8** %saved_stack, align 4 - - %__vla_expr0 = alloca i32, align 4 - %1 = load i32, i32* %i, align 4 - %vla = alloca double, i32 %1, align 8 - store i32 %1, i32* %__vla_expr0, align 4 - - %2 = load i8*, i8** %saved_stack, align 4 - call void @llvm.stackrestore(i8* %2) - - ret void -} - -declare i8* @llvm.stacksave() #1 -declare void @llvm.stackrestore(i8* %ptr) #1 - -attributes #0 = { noinline nounwind "stackrealign" } -attributes #1 = { nounwind } - -; CHECK-STATIC: push {r4, -; CHECK-R6: r6 -; CHECK-NO-R6-NOT: r6 -; CHECK-STATIC: lr} -; CHECK-R6: r6 -; CHECK-NO-R6-NOT: r6 -; CHECK-STATIC: pop {r4, -; CHECK-R6: r6 -; CHECK-NO-R6-NOT: r6 -; CHECK-STATIC: pc} - diff --git a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll b/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll deleted file mode 100644 index e2a4af87dde7e..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc < %s -mattr=+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s -; -; Equivalent C source code -; register unsigned r6 asm("r6"); -; void bar(unsigned int i, -; unsigned int j, -; unsigned int k, -; unsigned int l, -; unsigned int m, -; unsigned int n, -; unsigned int o, -; unsigned int p) -; { -; r6 = 10; -; unsigned int result = i + j + k + l + m + n + o + p; -; } -declare void @llvm.write_register.i32(metadata, i32) nounwind - -define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { -entry: -; CHECK-NOT: push {{{.*}}r6,{{.*}}} -; CHECK: {{.*}}mov{{.*}}r6,{{.*}} -; CHECK-NOT: {{.*}}r6{{.*}} - %i.addr = alloca i32, align 4 - %j.addr = alloca i32, align 4 - %k.addr = alloca i32, align 4 - %l.addr = alloca i32, align 4 - %m.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %o.addr = alloca i32, align 4 - %p.addr = alloca i32, align 4 - %result = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %j, i32* %j.addr, align 4 - store i32 %k, i32* %k.addr, align 4 - store i32 %l, i32* %l.addr, align 4 - store i32 %m, i32* %m.addr, align 4 - store i32 %n, i32* %n.addr, align 4 - store i32 %o, i32* %o.addr, align 4 - store i32 %p, i32* %p.addr, align 4 - call void @llvm.write_register.i32(metadata !0, i32 10) - %0 = load i32, i32* %i.addr, align 4 - %1 = load i32, i32* %j.addr, align 4 - %add = add i32 %0, %1 - %2 = load i32, i32* %k.addr, align 4 - %add1 = add i32 %add, %2 - %3 = load i32, i32* %l.addr, align 4 - %add2 = add i32 %add1, %3 - %4 = load i32, i32* %m.addr, align 4 - %add3 = add i32 %add2, %4 - %5 = load i32, i32* %n.addr, align 4 - %add4 = add i32 %add3, %5 - %6 = load i32, i32* %o.addr, align 4 - %add5 = add i32 %add4, %6 - %7 = load i32, i32* %p.addr, align 4 - %add6 = add i32 %add5, %7 - store i32 %add6, i32* %result, align 4 - ret void -} - -!llvm.named.register.r6 = !{!0} -!0 = !{!"r6"} - diff --git a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll b/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll deleted file mode 100644 index 3647c0701a7c3..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llc < %s -mattr=+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s -; -; Equivalent C source code -; void bar(unsigned int i, -; unsigned int j, -; unsigned int k, -; unsigned int l, -; unsigned int m, -; unsigned int n, -; unsigned int o, -; unsigned int p) -; { -; unsigned int result = i + j + k + l + m + n + o + p; -; } - -define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { -entry: -; CHECK-NOT: push {{{.*}}r6,{{.*}}} - %i.addr = alloca i32, align 4 - %j.addr = alloca i32, align 4 - %k.addr = alloca i32, align 4 - %l.addr = alloca i32, align 4 - %m.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %o.addr = alloca i32, align 4 - %p.addr = alloca i32, align 4 - %result = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %j, i32* %j.addr, align 4 - store i32 %k, i32* %k.addr, align 4 - store i32 %l, i32* %l.addr, align 4 - store i32 %m, i32* %m.addr, align 4 - store i32 %n, i32* %n.addr, align 4 - store i32 %o, i32* %o.addr, align 4 - store i32 %p, i32* %p.addr, align 4 - %0 = load i32, i32* %i.addr, align 4 - %1 = load i32, i32* %j.addr, align 4 - %add = add i32 %0, %1 - %2 = load i32, i32* %k.addr, align 4 - %add1 = add i32 %add, %2 - %3 = load i32, i32* %l.addr, align 4 - %add2 = add i32 %add1, %3 - %4 = load i32, i32* %m.addr, align 4 - %add3 = add i32 %add2, %4 - %5 = load i32, i32* %n.addr, align 4 - %add4 = add i32 %add3, %5 - %6 = load i32, i32* %o.addr, align 4 - %add5 = add i32 %add4, %6 - %7 = load i32, i32* %p.addr, align 4 - %add6 = add i32 %add5, %7 - store i32 %add6, i32* %result, align 4 -; CHECK: {{.*}}r5{{.*}} -; CHECK-NOT: {{.*}}r6{{.*}} - ret void -; CHECK-NOT: pop {{{.*}}r6,{{.*}}} -} - diff --git a/llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll b/llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll deleted file mode 100644 index d1f020936a3d6..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s -; -; Equivalent C source code -; void bar(unsigned int i, -; unsigned int j, -; unsigned int k, -; unsigned int l, -; unsigned int m, -; unsigned int n, -; unsigned int o, -; unsigned int p) -; { -; unsigned int result = i + j + k + l + m + n + o + p; -; } - -define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { -entry: -; CHECK: push {{{.*}}r4, r5{{.*}}} - %i.addr = alloca i32, align 4 - %j.addr = alloca i32, align 4 - %k.addr = alloca i32, align 4 - %l.addr = alloca i32, align 4 - %m.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %o.addr = alloca i32, align 4 - %p.addr = alloca i32, align 4 - %result = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %j, i32* %j.addr, align 4 - store i32 %k, i32* %k.addr, align 4 - store i32 %l, i32* %l.addr, align 4 - store i32 %m, i32* %m.addr, align 4 - store i32 %n, i32* %n.addr, align 4 - store i32 %o, i32* %o.addr, align 4 - store i32 %p, i32* %p.addr, align 4 - %0 = load i32, i32* %i.addr, align 4 - %1 = load i32, i32* %j.addr, align 4 - %add = add i32 %0, %1 - %2 = load i32, i32* %k.addr, align 4 - %add1 = add i32 %add, %2 - %3 = load i32, i32* %l.addr, align 4 - %add2 = add i32 %add1, %3 - %4 = load i32, i32* %m.addr, align 4 - %add3 = add i32 %add2, %4 - %5 = load i32, i32* %n.addr, align 4 - %add4 = add i32 %add3, %5 - %6 = load i32, i32* %o.addr, align 4 - %add5 = add i32 %add4, %6 - %7 = load i32, i32* %p.addr, align 4 - %add6 = add i32 %add5, %7 - store i32 %add6, i32* %result, align 4 -; CHECK: {{.*}}r4{{.*}} -; CHECK: {{.*}}r5{{.*}} - -; CHECK: pop {{{.*}}r4, r5{{.*}}} - ret void -} - diff --git a/llvm/test/CodeGen/ARM/vmul.ll b/llvm/test/CodeGen/ARM/vmul.ll index fcffe175e2bac..e8cf8d9b27b6f 100644 --- a/llvm/test/CodeGen/ARM/vmul.ll +++ b/llvm/test/CodeGen/ARM/vmul.ll @@ -574,7 +574,7 @@ for.body33: ; preds = %for.body33, %for.bo %vmovl.i225 = zext <8 x i8> undef to <8 x i16> %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249 %vshl_n = shl <8 x i16> %mul.i223, - %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> , <8 x i16> %vshl_n) nounwind + %vqsub2.i216 = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> , <8 x i16> %vshl_n) nounwind %mul.i209 = mul <8 x i16> undef, %vshr_n130 = lshr <8 x i16> undef, %vshr_n134 = lshr <8 x i16> %mul.i209, @@ -608,7 +608,7 @@ for.end179: ; preds = %for.cond.loopexit, } declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone ; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8), diff --git a/llvm/test/CodeGen/ARM/vqadd.ll b/llvm/test/CodeGen/ARM/vqadd.ll index d1e90cb209449..47432c7b732d6 100644 --- a/llvm/test/CodeGen/ARM/vqadd.ll +++ b/llvm/test/CodeGen/ARM/vqadd.ll @@ -5,7 +5,7 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqadd.s8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqadd.s16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqadd.s32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -32,7 +32,7 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqadd.s64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -41,7 +41,7 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqadd.u8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -50,7 +50,7 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqadd.u16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -59,7 +59,7 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqadd.u32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -68,7 +68,7 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqadd.u64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -77,7 +77,7 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqadd.s8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -86,7 +86,7 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqadd.s16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -95,7 +95,7 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqadd.s32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -104,7 +104,7 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqadd.s64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -113,7 +113,7 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqadd.u8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -122,7 +122,7 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqadd.u16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -131,7 +131,7 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqadd.u32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -140,26 +140,26 @@ define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqadd.u64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/vqdmul.ll b/llvm/test/CodeGen/ARM/vqdmul.ll index 6da080012a1e6..fa938d45becfb 100644 --- a/llvm/test/CodeGen/ARM/vqdmul.ll +++ b/llvm/test/CodeGen/ARM/vqdmul.ll @@ -204,7 +204,7 @@ define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = load <4 x i16>, <4 x i16>* %C %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3) - %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) + %tmp5 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -215,7 +215,7 @@ define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = load <2 x i32>, <2 x i32>* %C %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3) - %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) + %tmp5 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -225,7 +225,7 @@ entry: ; CHECK: vqdmlal.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0) - %2 = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) ret <4 x i32> %2 } @@ -235,12 +235,12 @@ entry: ; CHECK: vqdmlal.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0) - %2 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) + %2 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) ret <2 x i64> %2 } -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ;CHECK-LABEL: vqdmlsls16_natural: @@ -249,7 +249,7 @@ define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = load <4 x i16>, <4 x i16>* %C %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3) - %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) + %tmp5 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -260,7 +260,7 @@ define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = load <2 x i32>, <2 x i32>* %C %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3) - %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) + %tmp5 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -270,7 +270,7 @@ entry: ; CHECK: vqdmlsl.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0) - %2 = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) ret <4 x i32> %2 } @@ -280,9 +280,9 @@ entry: ; CHECK: vqdmlsl.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0) - %2 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) + %2 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) ret <2 x i64> %2 } -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/vqsub.ll b/llvm/test/CodeGen/ARM/vqsub.ll index 40963ce824864..9864f6421cb3d 100644 --- a/llvm/test/CodeGen/ARM/vqsub.ll +++ b/llvm/test/CodeGen/ARM/vqsub.ll @@ -5,7 +5,7 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqsub.s8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqsub.s16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqsub.s32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -32,7 +32,7 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqsub.s64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -41,7 +41,7 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqsub.u8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -50,7 +50,7 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqsub.u16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -59,7 +59,7 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqsub.u32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -68,7 +68,7 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqsub.u64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -77,7 +77,7 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqsub.s8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -86,7 +86,7 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqsub.s16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -95,7 +95,7 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqsub.s32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -104,7 +104,7 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqsub.s64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -113,7 +113,7 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqsub.u8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -122,7 +122,7 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqsub.u16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -131,7 +131,7 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqsub.u32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -140,26 +140,26 @@ define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqsub.u64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.usub.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll b/llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll new file mode 100644 index 0000000000000..8851c502b6f0b --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll @@ -0,0 +1,65 @@ +; RUN: llc -march=bpfel -filetype=obj -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=obj -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfel -filetype=obj -addrsig -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=obj -addrsig -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; +; Source Code: +; struct tt { int a; } __attribute__((preserve_access_index)); +; int test(struct tt *arg) { +; return arg->a; +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +%struct.tt = type { i32 } + +; Function Attrs: nounwind readonly +define dso_local i32 @test(%struct.tt* readonly %arg) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata %struct.tt* %arg, metadata !16, metadata !DIExpression()), !dbg !17 + %0 = tail call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.tts(%struct.tt* %arg, i32 0, i32 0), !dbg !18, !llvm.preserve.access.index !12 + %1 = load i32, i32* %0, align 4, !dbg !18, !tbaa !19 + ret i32 %1, !dbg !24 +} + +; CHECK-NOT: llvm.tt:0:0$0:0 + +; Function Attrs: nounwind readnone +declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.tts(%struct.tt*, i32, i32) #1 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +attributes #0 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readnone speculatable} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 (https://github.com/llvm/llvm-project.git 947f9692440836dcb8d88b74b69dd379d85974ce)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/bug") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project.git 947f9692440836dcb8d88b74b69dd379d85974ce)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "tt", file: !1, line: 1, size: 32, elements: !13) +!13 = !{!14} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!15 = !{!16} +!16 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 2, type: !11) +!17 = !DILocation(line: 0, scope: !7) +!18 = !DILocation(line: 3, column: 15, scope: !7) +!19 = !{!20, !21, i64 0} +!20 = !{!"tt", !21, i64 0} +!21 = !{!"int", !22, i64 0} +!22 = !{!"omnipotent char", !23, i64 0} +!23 = !{!"Simple C/C++ TBAA"} +!24 = !DILocation(line: 3, column: 3, scope: !7) diff --git a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir new file mode 100644 index 0000000000000..94c69f1be36a6 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir @@ -0,0 +1,61 @@ +# RUN: llc -run-pass mir-namer -x mir -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -x mir -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + define i32 @_Z1fi(i32 %arg) { + %tmp = alloca i32, align 4 + %tmp1 = alloca i32, align 4 + ret i32 %arg + } + +... +--- +name: _Z1fi +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } + - { id: 8, class: _, preferred-register: '' } +stack: + - { id: 0, name: tmp, type: default, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: tmp1, type: default, offset: 0, size: 4, alignment: 4 } +body: | + bb.0: + %tmp0:_(s32) = COPY $edi + %tmp1:_(s32) = G_CONSTANT i32 0 + %tmp5:_(p0) = G_FRAME_INDEX %stack.0.tmp + %tmp6:_(p0) = G_FRAME_INDEX %stack.1.tmp1 + G_STORE %tmp0(s32), %tmp5(p0) :: (store 4 into %ir.tmp) + %tmp7:_(s32) = G_LOAD %tmp5(p0) :: (load 4 from %ir.tmp) + %tmp8:_(s1) = G_ICMP intpred(ne), %tmp7(s32), %tmp1 + G_BRCOND %tmp8(s1), %bb.1 + G_BR %bb.2 + + ; CHECK: bb.1: + ; CHECK: %bb2_{{[0-9]+}}__1:_(s32) = G_CONSTANT + bb.1: + %tmp4:_(s32) = G_CONSTANT i32 1 + G_STORE %tmp4(s32), %tmp6(p0) :: (store 4 into %ir.tmp1) + G_BR %bb.3 + + + ; CHECK: bb.2: + ; CHECK: %bb1_{{[0-9]+}}__1:_(s32) = G_CONSTANT + bb.2: + %tmp3:_(s32) = G_CONSTANT i32 2 + G_STORE %tmp3(s32), %tmp6(p0) :: (store 4 into %ir.tmp1) + + ; CHECK: bb.3: + ; CHECK: %bb3_{{[0-9]+}}__1:_(s32) = G_LOAD + bb.3: + %tmp9:_(s32) = G_LOAD %tmp6(p0) :: (load 4 from %ir.tmp1) + $eax = COPY %tmp9(s32) + RET 0, implicit $eax + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir index 5a805af18ded3..e30870b73635d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir @@ -131,10 +131,9 @@ tracksRegLiveness: true body: | bb.1.entry: ; MIPS32-LABEL: name: i1_true - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; MIPS32: $v0 = COPY [[AND]](s32) ; MIPS32: RetRA implicit $v0 %0:_(s1) = G_CONSTANT i1 true diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir index 59d4280e1ba88..057abae4d8198 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir @@ -150,14 +150,13 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY4]], [[COPY5]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[XOR]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]] ; MIPS32: $v0 = COPY [[SELECT]](s32) ; MIPS32: RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/constants.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/constants.ll index bdafe26491a34..0187b72780d7f 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/constants.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/constants.ll @@ -71,7 +71,7 @@ entry: define zeroext i1 @i1_true() { ; MIPS32-LABEL: i1_true: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 65535 +; MIPS32-NEXT: ori $1, $zero, 1 ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll index 8f559633c9569..58d5c8a160a6b 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll @@ -15,7 +15,7 @@ entry: define i1 @true_s(float %x, float %y) { ; MIPS32-LABEL: true_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $2, $zero, 65535 +; MIPS32-NEXT: ori $2, $zero, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -233,7 +233,7 @@ entry: define i1 @true_d(double %x, double %y) { ; MIPS32-LABEL: true_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $2, $zero, 65535 +; MIPS32-NEXT: ori $2, $zero, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll index c127d1208919f..71c3023ca153f 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll @@ -56,8 +56,9 @@ entry: define i32 @select_with_negation(i32 %a, i32 %b, i32 %x, i32 %y) { ; MIPS32-LABEL: select_with_negation: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $4, $5 -; MIPS32-NEXT: not $1, $1 +; MIPS32-NEXT: ori $1, $zero, 1 +; MIPS32-NEXT: slt $2, $4, $5 +; MIPS32-NEXT: xor $1, $2, $1 ; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: movn $7, $6, $1 ; MIPS32-NEXT: move $2, $7 diff --git a/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir b/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir new file mode 100644 index 0000000000000..1539bb5f73e91 --- /dev/null +++ b/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +## Check that the delay-slot filler does not attempt to split BUNDLE instructions +# RUN: llc %s -start-before=mips-delay-slot-filler -stop-after=mips-delay-slot-filler \ +# RUN: -verify-machineinstrs -o - | FileCheck %s +## Check that we can emit assembly for input with BUNDLE instructions: +# RUN: llc %s -start-before=mips-delay-slot-filler -verify-machineinstrs -o - | FileCheck %s -check-prefix ASM + +# ASM: # %bb.0: +# ASM-NEXT: daddiu $sp, $sp, -16 +# ASM-NEXT: sd $ra, 8($sp) +## BUNDLE should be emitted in order: +# ASM-NEXT: daddiu $sp, $sp, -16 +# ASM-NEXT: daddiu $sp, $sp, 16 +# ASM-NEXT: beqz $4, .LBB0_2 +# ASM-NEXT: nop +--- | + target datalayout = "E-m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128" + target triple = "mips64-unknown-freebsd" + declare i8* @func_a(i64 zeroext) + declare i8* @func_b(i64 zeroext) + ; Function Attrs: nounwind + define i8* @test(i64 zeroext %nbytes) local_unnamed_addr #0 { + entry: + %cmp = icmp eq i64 %nbytes, 0 + br i1 %cmp, label %if.else, label %if.then + + if.then: ; preds = %entry + %call = tail call i8* @func_a(i64 zeroext %nbytes) + br label %return + + if.else: ; preds = %entry + %call1 = tail call i8* @func_b(i64 zeroext 0) + br label %return + + return: ; preds = %if.else, %if.then + %retval.0 = phi i8* [ %call, %if.then ], [ %call1, %if.else ] + ret i8* %retval.0 + } + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + +... +--- +name: test +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$a0_64', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$ra_64', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: $sp_64 = DADDiu $sp_64, -16 + ; CHECK: CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK: SD killed $ra_64, $sp_64, 8 :: (store 8 into %stack.0) + ; CHECK: CFI_INSTRUCTION offset $ra_64, -8 + ; CHECK: BUNDLE { + ; CHECK: $sp_64 = DADDiu $sp_64, -16 + ; CHECK: $sp_64 = DADDiu $sp_64, 16 + ; CHECK: } + ; CHECK: BEQ64 renamable $a0_64, $zero_64, %bb.2, implicit-def $at { + ; CHECK: NOP + ; CHECK: } + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: JAL @func_a, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit-def $sp, implicit-def $v0_64 { + ; CHECK: NOP + ; CHECK: } + ; CHECK: J %bb.3, implicit-def dead $at { + ; CHECK: NOP + ; CHECK: } + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: JAL @func_b, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit-def $sp, implicit-def $v0_64 { + ; CHECK: $a0_64 = DADDiu $zero_64, 0 + ; CHECK: } + ; CHECK: bb.3.return: + ; CHECK: $ra_64 = LD $sp_64, 8 :: (load 8 from %stack.0) + ; CHECK: PseudoReturn64 undef $ra_64, implicit $v0_64 { + ; CHECK: $sp_64 = DADDiu $sp_64, 16 + ; CHECK: } + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $a0_64, $ra_64 + + $sp_64 = DADDiu $sp_64, -16 + CFI_INSTRUCTION def_cfa_offset 16 + SD killed $ra_64, $sp_64, 8 :: (store 8 into %stack.0) + CFI_INSTRUCTION offset $ra_64, -8 + ; This BUNDLE instruction must not be split by the delay slot filler: + BUNDLE { + $sp_64 = DADDiu $sp_64, -16 + $sp_64 = DADDiu $sp_64, 16 + } + BEQ64 renamable $a0_64, $zero_64, %bb.2, implicit-def $at + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $a0_64 + + JAL @func_a, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit-def $sp, implicit-def $v0_64 + J %bb.3, implicit-def dead $at + + bb.2.if.else: + successors: %bb.3(0x80000000) + + $a0_64 = DADDiu $zero_64, 0 + JAL @func_b, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit-def $sp, implicit-def $v0_64 + + bb.3.return: + liveins: $v0_64 + + $ra_64 = LD $sp_64, 8 :: (load 8 from %stack.0) + $sp_64 = DADDiu $sp_64, 16 + PseudoReturn64 undef $ra_64, implicit $v0_64 + +... diff --git a/llvm/test/CodeGen/PowerPC/addi-licm.ll b/llvm/test/CodeGen/PowerPC/addi-licm.ll index e0314d19bd3f1..24c9805f1343d 100644 --- a/llvm/test/CodeGen/PowerPC/addi-licm.ll +++ b/llvm/test/CodeGen/PowerPC/addi-licm.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -disable-ppc-preinc-prep < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -disable-ppc-instr-form-prep < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PIP target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/PowerPC/aix-func-dsc-gen.ll b/llvm/test/CodeGen/PowerPC/aix-func-dsc-gen.ll index 217f687e8b086..29b2d4c454c4e 100644 --- a/llvm/test/CodeGen/PowerPC/aix-func-dsc-gen.ll +++ b/llvm/test/CodeGen/PowerPC/aix-func-dsc-gen.ll @@ -103,7 +103,7 @@ entry: ; CHECK-NEXT: SectionLen: 0 ; CHECK-NEXT: ParameterHashIndex: 0x0 ; CHECK-NEXT: TypeChkSectNum: 0x0 -; CHECK-NEXT: SymbolAlignmentLog2: 0 +; CHECK-NEXT: SymbolAlignmentLog2: 2 ; CHECK-NEXT: SymbolType: XTY_SD (0x1) ; CHECK-NEXT: StorageMappingClass: XMC_TC0 (0xF) ; CHECK-NEXT: StabInfoIndex: 0x0 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll index 2d6353876a331..b4b9f029ed0bb 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll @@ -69,4 +69,4 @@ __here: ; 64LARGE-ASM: ld [[REG2:[0-9]+]], LC0@l([[REG1]]) ; CHECK: .toc -; CHECK-NOT: .tc +; CHECK: .tc Ltmp0[TC],Ltmp0 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll index 8803a1e4569fb..1db8a55fb28e5 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll @@ -84,4 +84,4 @@ entry: ; 64LARGE-ASM: blr ; CHECK: .toc -; CHECK-NOT: .tc +; CHECK: .tc .LCPI0_0[TC],.LCPI0_0 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll index 5efb956b1529d..a5ec1942a3157 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll @@ -185,4 +185,4 @@ ; 64LARGE-ASM: .long LBB0_5-.LJTI0_0 ; CHECK: .toc -; CHECK-NOT: .tc +; CHECK: .tc .LJTI0_0[TC],.LJTI0_0 diff --git a/llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll b/llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll new file mode 100644 index 0000000000000..9fb3dec19edf2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --symbols %t.o | FileCheck %s + +define void @bar() { +entry: + call void bitcast (void (...)* @foo to void ()*)() + ret void +} + +declare extern_weak void @foo(...) + +;CHECK: Symbol { +;CHECK: Name: .foo +;CHECK-NEXT: Value (RelocatableAddress): 0x0 +;CHECK-NEXT: Section: N_UNDEF +;CHECK-NEXT: Type: 0x0 +;CHECK-NEXT: StorageClass: C_WEAKEXT (0x6F) +;CHECK-NEXT: NumberOfAuxEntries: 1 +;CHECK-NEXT: CSECT Auxiliary Entry { +;CHECK: SectionLen: 0 +;CHECK-NEXT: ParameterHashIndex: 0x0 +;CHECK-NEXT: TypeChkSectNum: 0x0 +;CHECK-NEXT: SymbolAlignmentLog2: 0 +;CHECK-NEXT: SymbolType: XTY_ER (0x0) +;CHECK-NEXT: StorageMappingClass: XMC_PR (0x0) +;CHECK-NEXT: StabInfoIndex: 0x0 +;CHECK-NEXT: StabSectNum: 0x0 +;CHECK-NEXT: } +;CHECK-NEXT: } + diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll new file mode 100644 index 0000000000000..d6e772ffc928e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s + +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYMS %s + +; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff -filetype=obj < %s 2>&1 | \ +; RUN: FileCheck --check-prefix=OBJ64 %s +; OBJ64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + +@a = external global i32, align 4 +@b = external global i64, align 8 +@c = external global i16, align 2 +@globa = common global i32 0, align 4 + +@ptr = internal global void (...)* null, align 4 + +; CHECK-NOT: .toc +; SYMS-NOT: Name: TOC diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll new file mode 100644 index 0000000000000..57f97064b5c9a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll @@ -0,0 +1,214 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefixes CHECK,CHECK32 %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck --check-prefixes CHECK,CHECK64 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s + +; RUN: not llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff -filetype=obj -o %t.o 2>&1 \ +; RUN: < %s | FileCheck --check-prefix=XCOFF64 %s +; XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + +@a = external global i32, align 4 +@b = external global i64, align 8 +@c = external global i16, align 2 +@globa = common global i32 0, align 4 + +@ptr = internal global void (...)* null, align 4 + +declare void @foo() + +define void @bar() { + %1 = alloca i8*, align 8 + store i32 0, i32* @a, align 4 + store i64 0, i64* @b, align 8 + store i16 0, i16* @c, align 2 + store i32 0, i32* @globa, align 4 + store void (...)* bitcast (void ()* @bar to void (...)*), void (...)** @ptr, align 4 + store i8* bitcast (void ()* @foo to i8*), i8** %1, align 8 + ret void +} + +; CHECK-NOT: .comm a +; CHECK-NOT: .lcomm a +; CHECK-NOT: .comm b +; CHECK-NOT: .lcomm b +; CHECK-NOT: .comm c +; CHECK-NOT: .lcomm c +; CHECK: .comm globa[RW],4,2 +; CHECK32: .lcomm ptr,4,ptr[BS],2 +; CHECK64: .lcomm ptr,8,ptr[BS],2 +; CHECK: .toc +; CHECK-NEXT: LC0: +; CHECK-NEXT: .tc a[TC],a[UA] +; CHECK-NEXT: LC1: +; CHECK-NEXT: .tc b[TC],b[UA] +; CHECK-NEXT: LC2: +; CHECK-NEXT: .tc c[TC],c[UA] +; CHECK-NEXT: LC3: +; CHECK-NEXT: .tc globa[TC],globa[RW] +; CHECK-NEXT: LC4: +; CHECK-NEXT: .tc ptr[TC],ptr[BS] +; CHECK-NEXT: LC5: +; CHECK-NEXT: .tc bar[TC],bar[DS] +; CHECK-NEXT: LC6: +; CHECK-NEXT: .tc foo[TC],foo[DS] + +; SYM: File: {{.*}}aix-xcoff-toc.ll.tmp.o +; SYM: Symbol {{[{][[:space:]] *}}Index: [[#INDX:]]{{[[:space:]] *}}Name: TOC +; SYM-NEXT: Value (RelocatableAddress): 0x54 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+1]] +; SYM-NEXT: SectionLen: 0 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC0 (0xF) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+2]] +; SYM-NEXT: Name: a +; SYM-NEXT: Value (RelocatableAddress): 0x54 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+3]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+4]] +; SYM-NEXT: Name: b +; SYM-NEXT: Value (RelocatableAddress): 0x58 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+5]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+6]] +; SYM-NEXT: Name: c +; SYM-NEXT: Value (RelocatableAddress): 0x5C +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+7]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+8]] +; SYM-NEXT: Name: globa +; SYM-NEXT: Value (RelocatableAddress): 0x60 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+9]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+10]] +; SYM-NEXT: Name: ptr +; SYM-NEXT: Value (RelocatableAddress): 0x64 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+11]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+12]] +; SYM-NEXT: Name: bar +; SYM-NEXT: Value (RelocatableAddress): 0x68 +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+13]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: [[#INDX+14]] +; SYM-NEXT: Name: foo +; SYM-NEXT: Value (RelocatableAddress): 0x6C +; SYM-NEXT: Section: .data +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: [[#INDX+15]] +; SYM-NEXT: SectionLen: 4 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 2 +; SYM-NEXT: SymbolType: XTY_SD (0x1) +; SYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } diff --git a/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll b/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll new file mode 100644 index 0000000000000..b15b63b166f70 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll @@ -0,0 +1,614 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,64BIT %s + +define void @call_test_chars() { +entry: + call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) + ret void +} + +; CHECK-LABEL: name: call_test_chars + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 97 +; 32BIT: $r4 = LI 97 +; 32BIT: $r5 = LI 97 +; 32BIT: $r6 = LI 97 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 97 +; 64BIT: $x4 = LI8 97 +; 64BIT: $x5 = LI8 97 +; 64BIT: $x6 = LI8 97 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define signext i8 @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 signext %c4) { +entry: + %conv = sext i8 %c1 to i32 + %conv1 = sext i8 %c2 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = sext i8 %c3 to i32 + %add3 = add nsw i32 %add, %conv2 + %conv4 = sext i8 %c4 to i32 + %add5 = add nsw i32 %add3, %conv4 + %conv6 = trunc i32 %add5 to i8 + ret i8 %conv6 +} + +; CHECK-LABEL: name: test_chars + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT: body: +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + +define void @call_test_chars_mix() { +entry: + call i8 @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31) + ret void +} + +; CHECK-LABEL: name: call_test_chars_mix + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 97 +; 32BIT: $r4 = LI 225 +; 32BIT: $r5 = LI 97 +; 32BIT: $r6 = LI -31 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 97 +; 64BIT: $x4 = LI8 225 +; 64BIT: $x5 = LI8 97 +; 64BIT: $x6 = LI8 -31 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define signext i8 @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3, i8 signext %c4) { +entry: + %conv = sext i8 %c1 to i32 + %conv1 = zext i8 %c2 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = zext i8 %c3 to i32 + %add3 = add nsw i32 %add, %conv2 + %conv4 = sext i8 %c4 to i32 + %add5 = add nsw i32 %add3, %conv4 + %conv6 = trunc i32 %add5 to i8 + ret i8 %conv6 +} + +; CHECK-LABEL: name: test_chars_mix + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT: body: +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + +@global_i1 = global i8 0, align 1 + +define void @test_i1(i1 %b) { + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, i8* @global_i1, align 1 + ret void +} + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3 +; 32BIT: renamable $r3 = RLWINM killed renamable $r3, 0, 31, 31 +; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store 1 into @global_i1) + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; 64BIT: renamable $r[[REG1:[0-9]+]] = RLWINM renamable $r[[REG1]], 0, 31, 31, implicit killed $x3 +; 64BIT-NEXT: STB killed renamable $r[[REG1]], 0, killed renamable $x4 :: (store 1 into @global_i1) + +define void @call_test_i1() { +entry: + call void @test_i1(i1 1) + ret void +} + +; CHECK-LABEL: name: call_test_i1 + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 1 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 1 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @test_i1zext(i1 zeroext %b) { + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, i8 * @global_i1, align 1 + ret void + } + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3 +; CHECK-NOT: RLWINM +; 32BIT: STB killed renamable $r3, 0, killed renamable $r4 :: (store 1 into @global_i1) + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; CHECK-NOT: RLWINM +; 64BIT: STB8 killed renamable $x3, 0, killed renamable $x4 :: (store 1 into @global_i1) + +define i32 @test_ints(i32 signext %a, i32 zeroext %b, i32 zeroext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) { +entry: + %add = add i32 %a, %b + %add1 = add i32 %add, %c + %add2 = add i32 %add1, %d + %add3 = add i32 %add2, %e + %add4 = add i32 %add3, %f + %add5 = add i32 %add4, %g + %add6 = add i32 %add5, %h + ret i32 %add6 +} + +; CHECK-LABEL: name: test_ints + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x8', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x9', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x10', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + +define void @call_test_ints() { +entry: + call i32 @test_ints(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1) + ret void +} + +; CHECK-LABEL: name: call_test_ints + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: renamable $x3 = LI8 1 +; 64BIT: renamable $x5 = RLDICR killed renamable $x3, 31, 32 +; 64BIT: $x3 = LI8 1 +; 64BIT: $x4 = LI8 1 +; 64BIT: $x6 = LIS8 32768 +; 64BIT: $x7 = LI8 1 +; 64BIT: $x8 = LI8 1 +; 64BIT: $x9 = LI8 1 +; 64BIT: $x10 = LI8 1 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @call_test_i64() { +entry: + call i64 @test_i64(i64 1, i64 2, i64 3, i64 4) + ret void +} + + +; CHECK-LABEL: name: call_test_i64 + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 0 +; 32BIT: $r4 = LI 1 +; 32BIT: $r5 = LI 0 +; 32BIT: $r6 = LI 2 +; 32BIT: $r7 = LI 0 +; 32BIT: $r8 = LI 3 +; 32BIT: $r9 = LI 0 +; 32BIT: $r10 = LI 4 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 1 +; 64BIT: $x4 = LI8 2 +; 64BIT: $x5 = LI8 3 +; 64BIT: $x6 = LI8 4 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define i64 @test_i64(i64 %a, i64 %b, i64 %c, i64 %d) { +entry: + %add = add nsw i64 %a, %b + %add1 = add nsw i64 %add, %c + %add2 = add nsw i64 %add1, %d + ret i64 %add2 +} + +; CHECK-LABEL: name: test_i64 + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + +define void @call_test_int_ptr() { +entry: + %b = alloca i32, align 4 + store i32 0, i32* %b, align 4 + call void @test_int_ptr(i32* %b) + ret void +} + +; CHECK-LABEL: name: call_test_int_ptr + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: renamable $r3 = ADDI %stack.0.b, 0 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: renamable $x3 = ADDI8 %stack.0.b, 0 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @test_int_ptr(i32* %a) { +entry: + %a.addr = alloca i32*, align 8 + store i32* %a, i32** %a.addr, align 8 + ret void +} + +; CHECK-LABEL: name: test_int_ptr + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3 +; 32BIT: STW killed renamable $r3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr, align 8) + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; 64BIT: STD killed renamable $x3, 0, %stack.0.a.addr :: (store 8 into %ir.a.addr) + + +define i32 @caller(i32 %i) { +entry: + %i.addr = alloca i32, align 4 + %b = alloca i8, align 1 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp ne i32 %0, 0 + %frombool = zext i1 %cmp to i8 + store i8 %frombool, i8* %b, align 1 + %1 = load i8, i8* %b, align 1 + %tobool = trunc i8 %1 to i1 + %call = call i32 @call_test_bool(i1 zeroext %tobool) + ret i32 %call +} + +declare i32 @call_test_bool(i1 zeroext) + +; CHECK-LABEL: name: caller + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT: liveins: $r3 +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1, implicit-def $r3 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1, implicit-def $x3 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +@f1 = global float 0.000000e+00, align 4 +@d1 = global double 0.000000e+00, align 8 + +define void @call_test_floats() { +entry: + %0 = load float, float* @f1, align 4 + call float @test_floats(float %0, float %0, float %0) + ret void +} + +; CHECK-LABEL: name: call_test_floats{{.*}} + +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $f2 = COPY renamable $f1 +; 32BIT-NEXT: $f3 = COPY renamable $f1 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $f2 = COPY renamable $f1 +; 64BIT-NEXT: $f3 = COPY renamable $f1 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define float @test_floats(float %f1, float %f2, float %f3) { +entry: + %add = fadd float %f1, %f2 + %add1 = fadd float %add, %f3 + ret float %add1 +} + +; CHECK-LABEL: name: test_floats{{.*}} + +; CHECK: liveins: +; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } +; CHECK: body: | +; CHECK-NEXT: bb.0.entry: +; CHECK-NEXT: liveins: $f1, $f2, $f3 + +define void @call_test_fpr_max() { +entry: + %0 = load double, double* @d1, align 8 + call double @test_fpr_max(double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0) + ret void +} + +; CHECK-LABEL: name: call_test_fpr_max{{.*}} + +; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $f2 = COPY renamable $f1 +; 32BIT-NEXT: $f3 = COPY renamable $f1 +; 32BIT-NEXT: $f4 = COPY renamable $f1 +; 32BIT-NEXT: $f5 = COPY renamable $f1 +; 32BIT-NEXT: $f6 = COPY renamable $f1 +; 32BIT-NEXT: $f7 = COPY renamable $f1 +; 32BIT-NEXT: $f8 = COPY renamable $f1 +; 32BIT-NEXT: $f9 = COPY renamable $f1 +; 32BIT-NEXT: $f10 = COPY renamable $f1 +; 32BIT-NEXT: $f11 = COPY renamable $f1 +; 32BIT-NEXT: $f12 = COPY renamable $f1 +; 32BIT-NEXT: $f13 = COPY renamable $f1 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $f2 = COPY renamable $f1 +; 64BIT-NEXT: $f3 = COPY renamable $f1 +; 64BIT-NEXT: $f4 = COPY renamable $f1 +; 64BIT-NEXT: $f5 = COPY renamable $f1 +; 64BIT-NEXT: $f6 = COPY renamable $f1 +; 64BIT-NEXT: $f7 = COPY renamable $f1 +; 64BIT-NEXT: $f8 = COPY renamable $f1 +; 64BIT-NEXT: $f9 = COPY renamable $f1 +; 64BIT-NEXT: $f10 = COPY renamable $f1 +; 64BIT-NEXT: $f11 = COPY renamable $f1 +; 64BIT-NEXT: $f12 = COPY renamable $f1 +; 64BIT-NEXT: $f13 = COPY renamable $f1 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define double @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) { +entry: + %add = fadd double %d1, %d2 + %add1 = fadd double %add, %d3 + %add2 = fadd double %add1, %d4 + %add3 = fadd double %add2, %d5 + %add4 = fadd double %add3, %d6 + %add5 = fadd double %add4, %d7 + %add6 = fadd double %add5, %d8 + %add7 = fadd double %add6, %d9 + %add8 = fadd double %add7, %d10 + %add9 = fadd double %add8, %d11 + %add10 = fadd double %add9, %d12 + %add11 = fadd double %add10, %d13 + ret double %add11 +} + +; CHECK-LABEL: name: test_fpr_max{{.*}} + +; CHECK: liveins: +; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f4', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f5', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f6', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f7', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f8', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f9', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f10', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f11', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f12', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f13', virtual-reg: '' } +; CHECK: body: | +; CHECK-NEXT: bb.0.entry: +; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + +define void @call_test_mix() { +entry: + %0 = load float, float* @f1, align 4 + %1 = load double, double* @d1, align 8 + call i32 @test_mix(float %0, i32 1, double %1, i8 signext 97) + ret void +} + +; CHECK-LABEL: name: call_test_mix{{.*}} + +; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $r[[REG2:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG2]] :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r4 = LI 1 +; 32BIT-NEXT: $r7 = LI 97 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x4 = LI8 1 +; 64BIT-NEXT: $x6 = LI8 97 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define i32 @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) { +entry: + %conv = fpext float %f to double + %add = fadd double %conv, %d + %conv1 = fptrunc double %add to float + %conv2 = zext i8 %c to i32 + %add3 = add nsw i32 %i, %conv2 + %conv4 = sitofp i32 %add3 to float + %add5 = fadd float %conv4, %conv1 + %conv6 = fptosi float %add5 to i32 + ret i32 %conv6 +} + +; CHECK-LABEL: name: test_mix{{.*}} + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$f1', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$f2', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $f1, $f2, $r4, $r7 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$f1', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$f2', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $f1, $f2, $x4, $x6 + + +define i64 @callee_mixed_ints(i32 %a, i8 signext %b, i32 %c, i16 signext %d, i64 %e) { +entry: + %conv = zext i8 %b to i32 + %add = add nsw i32 %a, %conv + %add1 = add nsw i32 %add, %c + %conv2 = sext i16 %d to i32 + %add3 = add nsw i32 %add1, %conv2 + %conv4 = sext i32 %add3 to i64 + %add5 = add nsw i64 %conv4, %e + ret i64 %add5 + } + +; CHECK-LABEL: name: callee_mixed_ints + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7 diff --git a/llvm/test/CodeGen/PowerPC/aix_fpr_param.ll b/llvm/test/CodeGen/PowerPC/aix_fpr_param.ll deleted file mode 100644 index f92096f3ab7d3..0000000000000 --- a/llvm/test/CodeGen/PowerPC/aix_fpr_param.ll +++ /dev/null @@ -1,150 +0,0 @@ -; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=32BIT %s - -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=64BIT %s - -@f1 = global float 0.000000e+00, align 4 -@d1 = global double 0.000000e+00, align 8 - -define void @call_test_float() { -entry: -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load float, float* @f1, align 4 - call void @test_float(float %0) - ret void -} - -declare void @test_float(float) - -define void @call_test_floats() { -entry: -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $f2 = COPY renamable $f1 -; 32BIT: $f3 = COPY renamable $f1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $f2 = COPY renamable $f1 -; 64BIT: $f3 = COPY renamable $f1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load float, float* @f1, align 4 - call void @test_floats(float %0, float %0, float %0) - ret void -} - -declare void @test_floats(float, float, float) - -define void @call_test_double() { -entry: -; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load double, double* @d1, align 8 - call void @test_double(double %0) - ret void -} - -declare void @test_double(double) - -define void @call_test_fpr_max() { -entry: -; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $f2 = COPY renamable $f1 -; 32BIT: $f3 = COPY renamable $f1 -; 32BIT: $f4 = COPY renamable $f1 -; 32BIT: $f5 = COPY renamable $f1 -; 32BIT: $f6 = COPY renamable $f1 -; 32BIT: $f7 = COPY renamable $f1 -; 32BIT: $f8 = COPY renamable $f1 -; 32BIT: $f9 = COPY renamable $f1 -; 32BIT: $f10 = COPY renamable $f1 -; 32BIT: $f11 = COPY renamable $f1 -; 32BIT: $f12 = COPY renamable $f1 -; 32BIT: $f13 = COPY renamable $f1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $f2 = COPY renamable $f1 -; 64BIT: $f3 = COPY renamable $f1 -; 64BIT: $f4 = COPY renamable $f1 -; 64BIT: $f5 = COPY renamable $f1 -; 64BIT: $f6 = COPY renamable $f1 -; 64BIT: $f7 = COPY renamable $f1 -; 64BIT: $f8 = COPY renamable $f1 -; 64BIT: $f9 = COPY renamable $f1 -; 64BIT: $f10 = COPY renamable $f1 -; 64BIT: $f11 = COPY renamable $f1 -; 64BIT: $f12 = COPY renamable $f1 -; 64BIT: $f13 = COPY renamable $f1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load double, double* @d1, align 8 - call void @test_fpr_max(double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0) - ret void -} - -declare void @test_fpr_max(double, double, double, double, double, double, double, double, double, double, double, double, double) - -define void @call_test_mix() { -entry: -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT: renamable $r4 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) -; 32BIT: renamable $f2 = LFD 0, killed renamable $r4 :: (dereferenceable load 8 from @d1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r4 = LI 1 -; 32BIT: $r7 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT: renamable $x4 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) -; 64BIT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load 8 from @d1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x6 = LI8 97 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load float, float* @f1, align 4 - %1 = load double, double* @d1, align 8 - call void @test_mix(float %0, i32 1, double %1, i8 signext 97) - ret void -} - -declare void @test_mix(float, i32, double, i8 signext) diff --git a/llvm/test/CodeGen/PowerPC/aix_gpr_param.ll b/llvm/test/CodeGen/PowerPC/aix_gpr_param.ll deleted file mode 100644 index 42b6f886e687d..0000000000000 --- a/llvm/test/CodeGen/PowerPC/aix_gpr_param.ll +++ /dev/null @@ -1,199 +0,0 @@ -; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=32BIT %s - -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=64BIT %s - -define void @call_test_char() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_char(i8 signext 97) - ret void -} - -define void @call_test_chars() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: $r4 = LI 97 -; 32BIT: $r5 = LI 97 -; 32BIT: $r6 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: $x4 = LI8 97 -; 64BIT: $x5 = LI8 97 -; 64BIT: $x6 = LI8 97 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) - ret void -} - -define void @call_test_chars_mix() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: $r4 = LI 225 -; 32BIT: $r5 = LI 97 -; 32BIT: $r6 = LI -31 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: $x4 = LI8 225 -; 64BIT: $x5 = LI8 97 -; 64BIT: $x6 = LI8 -31 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31) - ret void -} - -define void @call_test_int() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_int(i32 1) - ret void -} - -define void @call_test_ints() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: $r4 = LI 1 -; 32BIT: $r5 = LI 1 -; 32BIT: $r6 = LI 1 -; 32BIT: $r7 = LI 1 -; 32BIT: $r8 = LI 1 -; 32BIT: $r9 = LI 1 -; 32BIT: $r10 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x5 = LI8 1 -; 64BIT: $x6 = LI8 1 -; 64BIT: $x7 = LI8 1 -; 64BIT: $x8 = LI8 1 -; 64BIT: $x9 = LI8 1 -; 64BIT: $x10 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_ints(i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) - ret void -} - -define void @call_test_ints_64bit() { -entry: -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = LI8 1 -; 64BIT: renamable $x5 = RLDICR killed renamable $x3, 31, 32 -; 64BIT: $x3 = LI8 1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x6 = LIS8 32768 -; 64BIT: $x7 = LI8 1 -; 64BIT: $x8 = LI8 1 -; 64BIT: $x9 = LI8 1 -; 64BIT: $x10 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_ints_64bit(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1) - ret void -} - -define void @call_test_i1() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_i1(i1 1) - ret void -} - -define void @call_test_i64() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 0 -; 32BIT: $r4 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_i64(i64 1) - ret void -} - -define void @call_test_int_ptr() { -entry: - %b = alloca i32, align 4 -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: renamable $r3 = ADDI %stack.0.b, 0 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = ADDI8 %stack.0.b, 0 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - store i32 0, i32* %b, align 4 - call void @test_int_ptr(i32* %b) - ret void -} - -declare void @test_char(i8 signext) - -declare void @test_chars(i8 signext, i8 signext, i8 signext, i8 signext) - -declare void @test_chars_mix(i8 signext, i8 zeroext, i8 zeroext, i8 signext) - -declare void @test_int(i32) - -declare void @test_ints(i32, i32, i32, i32, i32, i32, i32, i32) - -declare void @test_ints_64bit(i32 signext, i32 zeroext, i32 zeroext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) - -declare void @test_i1(i1) - -declare void @test_i64(i64) - -declare void @test_int_ptr(i32*) diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll new file mode 100644 index 0000000000000..baa39024ebe8d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll @@ -0,0 +1,11 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s + + +; Test mcpu=future that should be recognized on PowerPC. + +; CHECK-NOT: is not a recognized processor for this target +; CHECK: .text + diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll b/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll index b0586b06cd1fe..12887d8922592 100644 --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll @@ -11,8 +11,7 @@ define void @foo(i32 signext %var1) { ; CHECK-NEXT: xori r3, r3, 1 ; CHECK-NEXT: addis r4, r2, res@toc@ha ; CHECK-NEXT: cntlzw r3, r3 -; CHECK-NEXT: srwi r3, r3, 5 -; CHECK-NEXT: slwi r3, r3, 19 +; CHECK-NEXT: rlwinm r3, r3, 14, 0, 12 ; CHECK-NEXT: stw r3, res@toc@l(r4) ; CHECK-NEXT: blr entry: @@ -30,10 +29,10 @@ define void @foo_multiple_use(i32 signext %var1) { ; CHECK-NEXT: addis r4, r2, res2@toc@ha ; CHECK-NEXT: addis r6, r2, res@toc@ha ; CHECK-NEXT: cntlzw r3, r3 -; CHECK-NEXT: srwi r3, r3, 5 -; CHECK-NEXT: slwi r5, r3, 19 -; CHECK-NEXT: stw r3, res2@toc@l(r4) -; CHECK-NEXT: stw r5, res@toc@l(r6) +; CHECK-NEXT: srwi r5, r3, 5 +; CHECK-NEXT: rlwinm r3, r3, 14, 0, 12 +; CHECK-NEXT: stw r5, res2@toc@l(r4) +; CHECK-NEXT: stw r3, res@toc@l(r6) ; CHECK-NEXT: blr entry: %cmp = icmp eq i32 %var1, 1 diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir new file mode 100644 index 0000000000000..426aaa7a76313 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir @@ -0,0 +1,140 @@ +# RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s + +--- +name: testFoldRLWINM +#CHECK : name : testFoldRLWINM +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 5, 31 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 31 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM %1, 14, 0, 12 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMSrcFullMask1 +#CHECK : name : testFoldRLWINMSrcFullMask1 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 0, 31 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 0, 31 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM %1, 14, 0, 12 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMSrcFullMask2 +#CHECK : name : testFoldRLWINMSrcFullMask2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 10, 9 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 10, 9 + %3:gprc = RLWINM %2:gprc, 19, 10, 1 + ; CHECK: %3:gprc = RLWINM %1, 14, 10, 1 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMSrcWrapped +#CHECK : name : testFoldRLWINMSrcWrapped +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 30, 10 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 30 ,10 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM %1, 14, 11, 12 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMUserWrapped +#CHECK : name : testFoldRLWINMUserWrapped +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 10, 5, 31 + ; CHECKT: %2:gprc = RLWINM %1:gprc, 10, 5, 31 + %3:gprc = RLWINM %2:gprc, 10, 30, 5 + ; CHECK: %3:gprc = RLWINM %2, 10, 30, 5 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMMultipleUses +#CHECK : name : testFoldRLWINMMultipleUses +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM killed %1:gprc, 27, 5, 31 + ; CHECK: %2:gprc = RLWINM %1, 27, 5, 31 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM killed %1, 14, 0, 12 + STW %3:gprc, %2:gprc, 100 + ; CHECK: STW %3, %2, 100 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMToZero +#CHECK : name : testFoldRLWINMToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 5, 10 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 10 + %3:gprc = RLWINM %2:gprc, 8, 5, 10 + ; CHECK: %3:gprc = LI 0 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMoToZero +#CHECK : name : testFoldRLWINMoToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 5, 10 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 10 + %3:gprc = RLWINMo %2:gprc, 8, 5, 10, implicit-def $cr0 + ; CHECK: %3:gprc = ANDIo %2, 0, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMInvalidMask +#CHECK : name : testFoldRLWINMInvalidMask +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 20, 5, 31 + ; CHECK: %2:gprc = RLWINM %1, 20, 5, 31 + %3:gprc = RLWINM %2:gprc, 19, 10, 20 + ; CHECK: %3:gprc = RLWINM %2, 19, 10, 20 + BLR8 implicit $lr8, implicit $rm +... diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll index e48f43a2d4b32..e3254175dbe96 100644 --- a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll @@ -41,5 +41,5 @@ define void @test_store(i32 %0) { ; LARGE: stw [[REG3:[0-9]+]], 0([[REG2]]) ; LARGE: blr -; TODO Update test when TOC-entry emission lands. -; CHECK-NOT: .tc +; CHECK: .tc a[TC],a +; CHECK: .tc b[TC],b diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll index 371fa0ec279e3..6d1863bc95371 100644 --- a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll @@ -41,5 +41,5 @@ define void @test_store(i32 zeroext %0) { ; LARGE: stw [[REG3:[0-9]+]], 0([[REG2]]) ; LARGE: blr -; TODO Update test when TOC-entry emission lands. -; CHECK-NOT: .tc +; CHECK: .tc a[TC],a +; CHECK: .tc b[TC],b diff --git a/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir b/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir index bb2c29fc906d7..0374d55c0cb4d 100644 --- a/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir +++ b/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir @@ -11,8 +11,7 @@ tracksRegLiveness: true body: | bb.0.entry: ; CHECK-LABEL: name: test0 - ; CHECK: renamable $x4 = LI8 1024 - ; CHECK: $x3 = COPY killed renamable $x4 + ; CHECK: $x3 = LI8 1024 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x4 = LI8 1024 $x3 = COPY renamable killed $x4 @@ -28,8 +27,7 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test1 ; CHECK: bb.0.entry: - ; CHECK: renamable $x5 = LI8 42 - ; CHECK: renamable $x4 = COPY killed renamable $x5 + ; CHECK: renamable $x4 = LI8 42 ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: liveins: $x4 @@ -139,8 +137,8 @@ body: | ; CHECK-LABEL: name: iterative_deletion ; CHECK: liveins: $x5 - ; CHECK: renamable $x6 = ADDI8 killed renamable $x5, 1 - ; CHECK: $x3 = COPY $x6 + ; CHECK: renamable $x4 = ADDI8 killed renamable $x5, 1 + ; CHECK: $x3 = COPY $x4 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x6 = ADDI8 renamable killed $x5, 1 renamable $x4 = COPY renamable killed $x6 @@ -160,8 +158,8 @@ body: | ; CHECK-LABEL: name: Enter ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 killed renamable $x4, 1 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x6 + ; CHECK: renamable $x7 = ADDI8 killed renamable $x4, 1 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 renamable $x6 = ADDI8 killed renamable $x4, 1 @@ -181,10 +179,9 @@ body: | ; CHECK-LABEL: name: foo ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1 - ; CHECK: renamable $x7 = COPY killed renamable $x6 - ; CHECK: renamable $x8 = ADDI8 killed $x4, 2 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x8 + ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1 + ; CHECK: renamable $x6 = ADDI8 killed $x4, 2 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x6 ; CHECK: $x3 = ADD8 $x3, killed renamable $x7 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -208,10 +205,10 @@ body: | ; CHECK-LABEL: name: bar ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1 - ; CHECK: renamable $x8 = COPY $x6 - ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x6 + ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1 + ; CHECK: renamable $x8 = COPY killed renamable $x7 + ; CHECK: renamable $x7 = ADDI8 renamable $x5, 2 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7 ; CHECK: $x3 = ADD8 $x3, killed renamable $x8 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -236,10 +233,9 @@ body: | ; CHECK-LABEL: name: bogus ; CHECK: liveins: $x7 ; CHECK: renamable $x5 = COPY renamable $x7 - ; CHECK: renamable $x6 = ADDI8 $x7, 1 - ; CHECK: renamable $x7 = COPY $x6 + ; CHECK: renamable $x4 = ADDI8 $x7, 1 ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2 - ; CHECK: $x3 = ADD8 $x7, killed renamable $x5 + ; CHECK: $x3 = ADD8 killed renamable $x4, killed renamable $x5 ; CHECK: $x3 = ADD8 $x3, killed renamable $x6 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -263,10 +259,10 @@ body: | liveins: $x7 ; CHECK-LABEL: name: foobar ; CHECK: liveins: $x7 - ; CHECK: renamable $x6 = ADDI8 $x7, 1 - ; CHECK: renamable $x8 = COPY $x6 - ; CHECK: renamable $x6 = ADDI8 $x7, 2 - ; CHECK: $x3 = ADD8 $x6, $x7 + ; CHECK: renamable $x4 = ADDI8 $x7, 1 + ; CHECK: renamable $x8 = COPY killed renamable $x4 + ; CHECK: renamable $x4 = ADDI8 $x7, 2 + ; CHECK: $x3 = ADD8 killed renamable $x4, $x7 ; CHECK: $x3 = ADD8 $x3, killed renamable $x8 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -280,3 +276,22 @@ body: | BLR8 implicit $lr8, implicit undef $rm, implicit $x3 ... + +--- +name: cross_call +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x2, $x3, $x20 + ; CHECK-LABEL: name: cross_call + ; CHECK: liveins: $x2, $x3, $x20 + ; CHECK: renamable $x20 = LI8 1024 + ; CHECK: BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2 + ; CHECK: $x3 = COPY killed renamable $x20 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + renamable $x20 = LI8 1024 + BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2 + $x3 = COPY renamable killed $x20 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/ppc-passname.ll b/llvm/test/CodeGen/PowerPC/ppc-passname.ll index 005f0a25c5637..98343bdb535c2 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-passname.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-passname.ll @@ -1,13 +1,13 @@ -; Test pass name: ppc-loop-preinc-prep. -; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-loop-preinc-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-LOOP-PREINC-PREP -; STOP-BEFORE-LOOP-PREINC-PREP-NOT: -ppc-loop-preinc-prep -; STOP-BEFORE-LOOP-PREINC-PREP-NOT: "ppc-loop-preinc-prep" pass is not registered. -; STOP-BEFORE-LOOP-PREINC-PREP-NOT: Prepare loop for pre-inc. addressing modes - -; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-loop-preinc-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-LOOP-PREINC-PREP -; STOP-AFTER-LOOP-PREINC-PREP: -ppc-loop-preinc-prep -; STOP-AFTER-LOOP-PREINC-PREP-NOT: "ppc-loop-preinc-prep" pass is not registered. -; STOP-AFTER-LOOP-PREINC-PREP: Prepare loop for pre-inc. addressing modes +; Test pass name: ppc-loop-instr-form-prep. +; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-loop-instr-form-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-LOOP-INSTR-FORM-PREP +; STOP-BEFORE-LOOP-INSTR-FORM-PREP-NOT: -ppc-loop-instr-form-prep +; STOP-BEFORE-LOOP-INSTR-FORM-PREP-NOT: "ppc-loop-instr-form-prep" pass is not registered. +; STOP-BEFORE-LOOP-INSTR-FORM-PREP-NOT: Prepare loop for ppc preferred instruction forms + +; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-loop-instr-form-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-LOOP-INSTR-FORM-PREP +; STOP-AFTER-LOOP-INSTR-FORM-PREP: -ppc-loop-instr-form-prep +; STOP-AFTER-LOOP-INSTR-FORM-PREP-NOT: "ppc-loop-instr-form-prep" pass is not registered. +; STOP-AFTER-LOOP-INSTR-FORM-PREP: Prepare loop for ppc preferred instruction forms ; Test pass name: ppc-toc-reg-deps. diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll new file mode 100644 index 0000000000000..179ddc1980a94 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -0,0 +1,1569 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu < %s | FileCheck --check-prefix=PC64LE %s +; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s | FileCheck --check-prefix=PC64LE9 %s +; RUN: llc -O3 -mtriple=powerpc64-linux-gnu < %s | FileCheck --check-prefix=PC64 %s + +define ppc_fp128 @test_fadd_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_fadd_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __gcc_qadd +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fadd_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __gcc_qadd +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fadd_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __gcc_qadd +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %add = call ppc_fp128 @llvm.experimental.constrained.fadd.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %add +} + +define ppc_fp128 @test_fsub_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_fsub_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __gcc_qsub +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fsub_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __gcc_qsub +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fsub_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __gcc_qsub +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %sub = call ppc_fp128 @llvm.experimental.constrained.fsub.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %sub +} + +define ppc_fp128 @test_fmul_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_fmul_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __gcc_qmul +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fmul_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __gcc_qmul +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fmul_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __gcc_qmul +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %mul = call ppc_fp128 @llvm.experimental.constrained.fmul.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %mul +} + +define ppc_fp128 @test_fdiv_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_fdiv_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __gcc_qdiv +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fdiv_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __gcc_qdiv +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fdiv_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __gcc_qdiv +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %div = call ppc_fp128 @llvm.experimental.constrained.fdiv.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %div +} + +define ppc_fp128 @test_frem_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_frem_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl fmodl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_frem_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl fmodl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_frem_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl fmodl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %rem = call ppc_fp128 @llvm.experimental.constrained.frem.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %rem +} + +define ppc_fp128 @test_fma_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second, ppc_fp128 %third) nounwind { +; PC64LE-LABEL: test_fma_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl fmal +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fma_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl fmal +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fma_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl fmal +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %add = call ppc_fp128 @llvm.experimental.constrained.fma.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + ppc_fp128 %third, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %add +} + +define ppc_fp128 @test_sqrt_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_sqrt_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl sqrtl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_sqrt_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl sqrtl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_sqrt_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl sqrtl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %sqrt = call ppc_fp128 @llvm.experimental.constrained.sqrt.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %sqrt +} + +define ppc_fp128 @test_pow_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_pow_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl powl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_pow_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl powl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_pow_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl powl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %pow = call ppc_fp128 @llvm.experimental.constrained.pow.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %pow +} + +define ppc_fp128 @test_powi_ppc_fp128(ppc_fp128 %first, i32 %second) nounwind { +; PC64LE-LABEL: test_powi_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: clrldi 5, 5, 32 +; PC64LE-NEXT: bl __powitf2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_powi_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: clrldi 5, 5, 32 +; PC64LE9-NEXT: bl __powitf2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_powi_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: clrldi 5, 5, 32 +; PC64-NEXT: bl __powitf2 +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +; PC64LE9 : clrldi 5, 5, 32 +entry: + %powi = call ppc_fp128 @llvm.experimental.constrained.powi.ppcf128( + ppc_fp128 %first, + i32 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %powi +} + +define ppc_fp128 @test_sin_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_sin_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl sinl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_sin_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl sinl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_sin_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl sinl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %sin = call ppc_fp128 @llvm.experimental.constrained.sin.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %sin +} + +define ppc_fp128 @test_cos_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_cos_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl cosl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_cos_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl cosl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_cos_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl cosl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %cos = call ppc_fp128 @llvm.experimental.constrained.cos.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %cos +} + +define ppc_fp128 @test_exp_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_exp_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl expl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_exp_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl expl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_exp_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl expl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %exp = call ppc_fp128 @llvm.experimental.constrained.exp.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %exp +} + +define ppc_fp128 @test_exp2_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_exp2_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl exp2l +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_exp2_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl exp2l +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_exp2_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl exp2l +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %exp2 = call ppc_fp128 @llvm.experimental.constrained.exp2.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %exp2 +} + +define ppc_fp128 @test_log_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_log_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl logl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_log_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl logl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_log_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl logl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %log = call ppc_fp128 @llvm.experimental.constrained.log.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %log +} + +define ppc_fp128 @test_log2_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_log2_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl log2l +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_log2_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl log2l +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_log2_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl log2l +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %log2 = call ppc_fp128 @llvm.experimental.constrained.log2.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %log2 +} + +define ppc_fp128 @test_log10_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_log10_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl log10l +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_log10_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl log10l +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_log10_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl log10l +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %log10 = call ppc_fp128 @llvm.experimental.constrained.log10.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %log10 +} + +define ppc_fp128 @test_rint_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_rint_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl rintl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_rint_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl rintl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_rint_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl rintl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %rint = call ppc_fp128 @llvm.experimental.constrained.rint.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %rint +} + +define ppc_fp128 @test_nearbyint_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_nearbyint_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl nearbyintl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_nearbyint_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl nearbyintl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_nearbyint_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl nearbyintl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %nearbyint = call ppc_fp128 @llvm.experimental.constrained.nearbyint.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %nearbyint +} + +define ppc_fp128 @test_maxnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_maxnum_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl fmaxl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_maxnum_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl fmaxl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_maxnum_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl fmaxl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %maxnum = call ppc_fp128 @llvm.experimental.constrained.maxnum.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %maxnum +} + +define ppc_fp128 @test_minnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) nounwind { +; PC64LE-LABEL: test_minnum_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl fminl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_minnum_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl fminl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_minnum_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl fminl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %minnum = call ppc_fp128 @llvm.experimental.constrained.minnum.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %minnum +} + +define ppc_fp128 @test_ceil_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_ceil_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl ceill +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_ceil_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl ceill +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_ceil_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl ceill +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %ceil = call ppc_fp128 @llvm.experimental.constrained.ceil.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %ceil +} + +define ppc_fp128 @test_floor_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_floor_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl floorl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_floor_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl floorl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_floor_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl floorl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %floor = call ppc_fp128 @llvm.experimental.constrained.floor.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %floor +} + +define ppc_fp128 @test_round_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_round_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl roundl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_round_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl roundl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_round_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl roundl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %round = call ppc_fp128 @llvm.experimental.constrained.round.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %round +} + +define ppc_fp128 @test_trunc_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_trunc_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl truncl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_trunc_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl truncl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_trunc_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl truncl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %trunc = call ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret ppc_fp128 %trunc +} + +define float @test_fptrunc_ppc_fp128_f32(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_fptrunc_ppc_fp128_f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: frsp 1, 1 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fptrunc_ppc_fp128_f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: frsp 1, 1 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fptrunc_ppc_fp128_f32: +; PC64: # %bb.0: # %entry +; PC64-NEXT: frsp 1, 1 +; PC64-NEXT: blr +entry: + %fptrunc = call float @llvm.experimental.constrained.fptrunc.ppcf128.f32( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %fptrunc +} + +define double @test_fptrunc_ppc_fp128_f64(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_fptrunc_ppc_fp128_f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fptrunc_ppc_fp128_f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fptrunc_ppc_fp128_f64: +; PC64: # %bb.0: # %entry +; PC64-NEXT: blr +entry: + %fptrunc = call double @llvm.experimental.constrained.fptrunc.ppcf128.f64( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %fptrunc +} + +define ppc_fp128 @test_fpext_ppc_fp128_f32(float %first) nounwind { +; PC64LE-LABEL: test_fpext_ppc_fp128_f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: xxlxor 2, 2, 2 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fpext_ppc_fp128_f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: xxlxor 2, 2, 2 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fpext_ppc_fp128_f32: +; PC64: # %bb.0: # %entry +; PC64-NEXT: addis 3, 2, .LCPI26_0@toc@ha +; PC64-NEXT: lfs 2, .LCPI26_0@toc@l(3) +; PC64-NEXT: blr +entry: + %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128( + float %first, + metadata !"fpexcept.strict") + ret ppc_fp128 %fpext +} + +define ppc_fp128 @test_fpext_ppc_fp128_f64(double %first) nounwind { +; PC64LE-LABEL: test_fpext_ppc_fp128_f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: xxlxor 2, 2, 2 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fpext_ppc_fp128_f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: xxlxor 2, 2, 2 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fpext_ppc_fp128_f64: +; PC64: # %bb.0: # %entry +; PC64-NEXT: addis 3, 2, .LCPI27_0@toc@ha +; PC64-NEXT: lfs 2, .LCPI27_0@toc@l(3) +; PC64-NEXT: blr +entry: + %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.f64.ppcf128( + double %first, + metadata !"fpexcept.strict") + ret ppc_fp128 %fpext +} + +define i64 @test_fptosi_ppc_i64_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_fptosi_ppc_i64_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __fixtfdi +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fptosi_ppc_i64_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __fixtfdi +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fptosi_ppc_i64_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __fixtfdi +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %fpext = call i64 @llvm.experimental.constrained.fptosi.i64.ppcf128( + ppc_fp128 %first, + metadata !"fpexcept.strict") + ret i64 %fpext +} + +define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __gcc_qtou +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __gcc_qtou +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __gcc_qtou +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128( + ppc_fp128 %first, + metadata !"fpexcept.strict") + ret i32 %fpext +} + +define i64 @test_fptoui_ppc_i64_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_fptoui_ppc_i64_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __fixunstfdi +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fptoui_ppc_i64_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __fixunstfdi +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fptoui_ppc_i64_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __fixunstfdi +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %fpext = call i64 @llvm.experimental.constrained.fptoui.i64.ppcf128( + ppc_fp128 %first, + metadata !"fpexcept.strict") + ret i64 %fpext +} + +define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) nounwind { +; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: bl __fixunstfsi +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: bl __fixunstfsi +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_fptoui_ppc_i32_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: bl __fixunstfsi +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128( + ppc_fp128 %first, + metadata !"fpexcept.strict") + ret i32 %fpext +} + +; Test that resultant libcalls retain order even when their non-strict FLOP form could be +; trivially optimized into differing sequences. +define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %result) nounwind { +; PC64LE-LABEL: test_constrained_libcall_multichain: +; PC64LE: # %bb.0: +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 29, -48(1) # 8-byte Folded Spill +; PC64LE-NEXT: std 30, -40(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 29, -24(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; PC64LE-NEXT: std 0, 16(1) +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: mr 29, 3 +; PC64LE-NEXT: xxlxor 2, 2, 2 +; PC64LE-NEXT: li 3, 0 +; PC64LE-NEXT: mr 30, 4 +; PC64LE-NEXT: lfsx 31, 0, 29 +; PC64LE-NEXT: xxlxor 4, 4, 4 +; PC64LE-NEXT: std 3, 8(4) +; PC64LE-NEXT: fmr 1, 31 +; PC64LE-NEXT: fmr 3, 31 +; PC64LE-NEXT: stfdx 31, 0, 4 +; PC64LE-NEXT: bl __gcc_qadd +; PC64LE-NEXT: nop +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: fmr 4, 2 +; PC64LE-NEXT: fmr 30, 1 +; PC64LE-NEXT: fmr 29, 2 +; PC64LE-NEXT: stfd 1, 16(30) +; PC64LE-NEXT: stfd 2, 24(30) +; PC64LE-NEXT: bl __gcc_qmul +; PC64LE-NEXT: nop +; PC64LE-NEXT: fmr 1, 31 +; PC64LE-NEXT: xxlxor 2, 2, 2 +; PC64LE-NEXT: li 5, 2 +; PC64LE-NEXT: stfd 30, 32(30) +; PC64LE-NEXT: stfd 29, 40(30) +; PC64LE-NEXT: bl __powitf2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: frsp 0, 1 +; PC64LE-NEXT: stfsx 0, 0, 29 +; PC64LE-NEXT: stfd 2, -8(30) +; PC64LE-NEXT: stfd 1, -16(30) +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; PC64LE-NEXT: ld 30, -40(1) # 8-byte Folded Reload +; PC64LE-NEXT: ld 29, -48(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 29, -24(1) # 8-byte Folded Reload +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_constrained_libcall_multichain: +; PC64LE9: # %bb.0: +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 29, -48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: std 30, -40(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 29, -24(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 30, -16(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; PC64LE9-NEXT: std 0, 16(1) +; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: lfs 31, 0(3) +; PC64LE9-NEXT: mr 29, 3 +; PC64LE9-NEXT: li 3, 0 +; PC64LE9-NEXT: xxlxor 2, 2, 2 +; PC64LE9-NEXT: xxlxor 4, 4, 4 +; PC64LE9-NEXT: std 3, 8(4) +; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: fmr 3, 31 +; PC64LE9-NEXT: mr 30, 4 +; PC64LE9-NEXT: stfd 31, 0(4) +; PC64LE9-NEXT: bl __gcc_qadd +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: fmr 4, 2 +; PC64LE9-NEXT: fmr 30, 2 +; PC64LE9-NEXT: fmr 29, 1 +; PC64LE9-NEXT: stfd 1, 16(30) +; PC64LE9-NEXT: stfd 2, 24(30) +; PC64LE9-NEXT: bl __gcc_qmul +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: xxlxor 2, 2, 2 +; PC64LE9-NEXT: li 5, 2 +; PC64LE9-NEXT: stfd 29, 32(30) +; PC64LE9-NEXT: stfd 30, 40(30) +; PC64LE9-NEXT: bl __powitf2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: frsp 0, 1 +; PC64LE9-NEXT: stfs 0, 0(29) +; PC64LE9-NEXT: stfd 2, -8(30) +; PC64LE9-NEXT: stfd 1, -16(30) +; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 30, -16(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 29, -24(1) # 8-byte Folded Reload +; PC64LE9-NEXT: ld 30, -40(1) # 8-byte Folded Reload +; PC64LE9-NEXT: ld 29, -48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_constrained_libcall_multichain: +; PC64: # %bb.0: +; PC64-NEXT: mflr 0 +; PC64-NEXT: std 0, 16(1) +; PC64-NEXT: stdu 1, -176(1) +; PC64-NEXT: std 29, 120(1) # 8-byte Folded Spill +; PC64-NEXT: mr 29, 3 +; PC64-NEXT: li 3, 0 +; PC64-NEXT: stfd 31, 168(1) # 8-byte Folded Spill +; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill +; PC64-NEXT: std 30, 128(1) # 8-byte Folded Spill +; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill +; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill +; PC64-NEXT: mr 30, 4 +; PC64-NEXT: lfs 31, 0(29) +; PC64-NEXT: std 3, 8(4) +; PC64-NEXT: addis 3, 2, .LCPI32_0@toc@ha +; PC64-NEXT: lfs 30, .LCPI32_0@toc@l(3) +; PC64-NEXT: fmr 1, 31 +; PC64-NEXT: fmr 3, 31 +; PC64-NEXT: fmr 2, 30 +; PC64-NEXT: fmr 4, 30 +; PC64-NEXT: stfd 31, 0(4) +; PC64-NEXT: bl __gcc_qadd +; PC64-NEXT: nop +; PC64-NEXT: fmr 3, 1 +; PC64-NEXT: fmr 4, 2 +; PC64-NEXT: fmr 29, 1 +; PC64-NEXT: fmr 28, 2 +; PC64-NEXT: stfd 1, 16(30) +; PC64-NEXT: stfd 2, 24(30) +; PC64-NEXT: bl __gcc_qmul +; PC64-NEXT: nop +; PC64-NEXT: fmr 1, 31 +; PC64-NEXT: fmr 2, 30 +; PC64-NEXT: li 5, 2 +; PC64-NEXT: stfd 29, 32(30) +; PC64-NEXT: stfd 28, 40(30) +; PC64-NEXT: bl __powitf2 +; PC64-NEXT: nop +; PC64-NEXT: frsp 0, 1 +; PC64-NEXT: stfs 0, 0(29) +; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload +; PC64-NEXT: ld 29, 120(1) # 8-byte Folded Reload +; PC64-NEXT: stfd 2, -8(30) +; PC64-NEXT: stfd 1, -16(30) +; PC64-NEXT: ld 30, 128(1) # 8-byte Folded Reload +; PC64-NEXT: addi 1, 1, 176 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr + %load = load float, float* %firstptr + %first = call ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128( + float %load, + metadata !"fpexcept.strict") + store ppc_fp128 %first, ppc_fp128* %result + + ; For unconstrained FLOPs, these next two FP instructions would necessarily + ; be executed in series with one another. + %fadd = call ppc_fp128 @llvm.experimental.constrained.fadd.ppcf128( + ppc_fp128 %first, + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %stridx1 = getelementptr ppc_fp128, ppc_fp128* %result, i32 1 + store ppc_fp128 %fadd, ppc_fp128* %stridx1 + %fmul = call ppc_fp128 @llvm.experimental.constrained.fmul.ppcf128( + ppc_fp128 %fadd, + ppc_fp128 %fadd, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %stridx2 = getelementptr ppc_fp128, ppc_fp128* %stridx1, i32 1 + store ppc_fp128 %fadd, ppc_fp128* %stridx2 + + ; For unconstrained FLOPs, these next two FP instructions could be reordered + ; or even executed in parallel with respect to the previous two instructions. + ; However, strict floating point rules would not allow this. + %powi = call ppc_fp128 @llvm.experimental.constrained.powi.ppcf128( + ppc_fp128 %first, + i32 2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %tinypow = call float @llvm.experimental.constrained.fptrunc.ppcf128.f32( + ppc_fp128 %powi, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + store float %tinypow, float* %firstptr + %stridxn1 = getelementptr ppc_fp128, ppc_fp128* %result, i32 -1 + store ppc_fp128 %powi, ppc_fp128* %stridxn1 + ret void +} + +declare ppc_fp128 @llvm.experimental.constrained.fadd.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.ceil.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.cos.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fdiv.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.exp.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.exp2.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.floor.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fma.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128(float, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fpext.f64.ppcf128(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.ppcf128.f32(ppc_fp128, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.ppcf128.f64(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.log.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.log10.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.log2.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.maxnum.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.minnum.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fmul.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.nearbyint.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.pow.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.powi.ppcf128(ppc_fp128, i32, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.frem.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.rint.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.round.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.sin.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.sqrt.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fsub.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128(ppc_fp128, metadata, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata) diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll index 883d26b669088..a859121bb505c 100644 --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -15,8 +15,7 @@ define void @test() nounwind comdat { ; CHECK-NEXT: ld 29, 0(3) ; CHECK-NEXT: ld 30, 32(1) ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: bge- 0, .LBB0_2 -; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %bounds.ok ; CHECK-NEXT: # ; CHECK-NEXT: lfsx 2, 0, 3 @@ -26,7 +25,7 @@ define void @test() nounwind comdat { ; CHECK-NEXT: addi 30, 30, 1 ; CHECK-NEXT: stfsx 1, 0, 3 ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: blt+ 0, .LBB0_1 +; CHECK-NEXT: blt 0, .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %bounds.fail ; CHECK-NEXT: std 30, 32(1) %pos = alloca i64, align 8 diff --git a/llvm/test/CodeGen/PowerPC/pr44183.ll b/llvm/test/CodeGen/PowerPC/pr44183.ll new file mode 100644 index 0000000000000..1a6f932bc6d07 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr44183.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s +%struct.m.2.5.8.11 = type { %struct.l.0.3.6.9, [7 x i8], %struct.a.1.4.7.10 } +%struct.l.0.3.6.9 = type { i8 } +%struct.a.1.4.7.10 = type { [27 x i8], [0 x i32], [4 x i8] } +define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind align 2 { +; CHECK-LABEL: _ZN1m1nEv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: ld r4, 8(r30) +; CHECK-NEXT: lwz r5, 36(r30) +; CHECK-NEXT: rldicl r4, r4, 60, 4 +; CHECK-NEXT: rlwinm r3, r4, 31, 0, 0 +; CHECK-NEXT: rlwinm r4, r5, 0, 31, 31 +; CHECK-NEXT: or r4, r4, r3 +; CHECK-NEXT: bl _ZN1llsE1d +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 16(r30) +; CHECK-NEXT: ld r4, 8(r30) +; CHECK-NEXT: rldicl r4, r4, 60, 4 +; CHECK-NEXT: sldi r3, r3, 60 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: sldi r3, r3, 31 +; CHECK-NEXT: clrldi r4, r3, 32 +; CHECK-NEXT: bl _ZN1llsE1d +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: blr +entry: + %bc = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2 + %0 = bitcast %struct.a.1.4.7.10* %bc to i216* + %bf.load = load i216, i216* %0, align 8 + %bf.lshr = lshr i216 %bf.load, 4 + %shl.i23 = shl i216 %bf.lshr, 31 + %shl.i = trunc i216 %shl.i23 to i32 + %arrayidx = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2, i32 1, i64 0 + %1 = load i32, i32* %arrayidx, align 4 + %and.i = and i32 %1, 1 + %or.i = or i32 %and.i, %shl.i + tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %or.i) #1 + %bf.load10 = load i216, i216* %0, align 8 + %bf.lshr11 = lshr i216 %bf.load10, 4 + %shl.i1524 = shl i216 %bf.lshr11, 31 + %shl.i15 = trunc i216 %shl.i1524 to i32 + tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %shl.i15) #1 + ret void +} +declare void @_ZN1llsE1d(%struct.l.0.3.6.9*, i32) local_unnamed_addr #0 diff --git a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll index 6aaf169dabee4..dd41abd093d62 100644 --- a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -26,8 +26,7 @@ define dso_local i1 @t(%class.A* %this, i32 %color, i32 %vertex) local_unnamed_a ; CHECK-P9-NEXT: cmplwi r3, 2 ; CHECK-P9-NEXT: bge- cr0, .LBB0_6 ; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 -; CHECK-P9-NEXT: li r5, 0 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr ; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false ; CHECK-P9-NEXT: cmplwi cr0, r4, 0 diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll index 8fdcd1eac4505..7804b0a3f0979 100644 --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -44,7 +44,6 @@ define void @print_res() nounwind { ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: bdz .LBB0_4 -; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # ; CHECK-NEXT: clrldi 10, 8, 32 ; CHECK-NEXT: cntlzw 9, 6 diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index dd336065ef6c0..1160b4055674d 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -224,8 +224,7 @@ define i64 @sll(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srl a3, a4, a3 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sll a2, a0, a2 -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: ret %1 = shl i64 %a, %b ret i64 %1 @@ -311,8 +310,7 @@ define i64 @srl(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: srl a2, a1, a2 -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: ret %1 = lshr i64 %a, %b ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll new file mode 100644 index 0000000000000..acd64c203657a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f \ +; RUN: -target-abi ilp32f < %s | FileCheck %s -check-prefix=RV32IF +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32IFD +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64IFD + +; Test fcopysign scenarios where the sign argument is casted to the type of the +; magnitude argument. Those casts can be folded away by the DAGCombiner. + +declare double @llvm.copysign.f64(double, double) +declare float @llvm.copysign.f32(float, float) + +define double @fold_promote(double %a, float %b) nounwind { +; RV32I-LABEL: fold_promote: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_promote: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 31 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_promote: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.x.w a2, fa0 +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: and a2, a2, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a1, a3 +; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_promote: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.d.s ft0, fa1 +; RV32IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_promote: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.s ft0, fa1 +; RV64IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFD-NEXT: ret + %c = fpext float %b to double + %t = call double @llvm.copysign.f64(double %a, double %c) + ret double %t +} + +define float @fold_demote(float %a, double %b) nounwind { +; RV32I-LABEL: fold_demote: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_demote: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_demote: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_demote: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.s.d ft0, fa1 +; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_demote: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.s.d ft0, fa1 +; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFD-NEXT: ret + %c = fptrunc double %b to float + %t = call float @llvm.copysign.f32(float %a, float %c) + ret float %t +} diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 54c49f3f3ef66..6e1575d9dc5e7 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -23,8 +23,7 @@ define i64 @lshr64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: srl a2, a1, a2 -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr64: @@ -114,8 +113,7 @@ define i64 @shl64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srl a3, a4, a3 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sll a2, a0, a2 -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl64: @@ -191,8 +189,7 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: slli a4, a1, 1 ; RV64I-NEXT: sll a3, a4, a3 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: srl a2, a1, a2 -; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: srl a1, a1, a2 ; RV64I-NEXT: ret %1 = lshr i128 %a, %b ret i128 %1 @@ -298,8 +295,7 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: srli a4, a0, 1 ; RV64I-NEXT: srl a3, a4, a3 ; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: sll a2, a0, a2 -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: sll a0, a0, a2 ; RV64I-NEXT: ret %1 = shl i128 %a, %b ret i128 %1 diff --git a/llvm/test/CodeGen/RISCV/tls-models.ll b/llvm/test/CodeGen/RISCV/tls-models.ll index a2015b086f95f..25a2f71beb317 100644 --- a/llvm/test/CodeGen/RISCV/tls-models.ll +++ b/llvm/test/CodeGen/RISCV/tls-models.ll @@ -3,16 +3,17 @@ ; RUN: | FileCheck -check-prefix=RV32-PIC %s ; RUN: llc -mtriple=riscv64 -relocation-model=pic < %s \ ; RUN: | FileCheck -check-prefix=RV64-PIC %s -; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=NOPIC %s -; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=NOPIC %s +; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=RV32-NOPIC %s +; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64-NOPIC %s ; Check that TLS symbols are lowered correctly based on the specified -; model. +; model. Make sure they're external to avoid them all being optimised to Local +; Exec for the executable. -@unspecified = thread_local global i32 42 -@ld = thread_local(localdynamic) global i32 42 -@ie = thread_local(initialexec) global i32 42 -@le = thread_local(localexec) global i32 42 +@unspecified = external thread_local global i32 +@ld = external thread_local(localdynamic) global i32 +@ie = external thread_local(initialexec) global i32 +@le = external thread_local(localexec) global i32 ; No model specified @@ -44,12 +45,23 @@ define i32* @f1() nounwind { ; RV64-PIC-NEXT: addi sp, sp, 16 ; RV64-PIC-NEXT: ret ; -; NOPIC-LABEL: f1: -; NOPIC: # %bb.0: # %entry -; NOPIC-NEXT: lui a0, %tprel_hi(unspecified) -; NOPIC-NEXT: add a0, a0, tp, %tprel_add(unspecified) -; NOPIC-NEXT: addi a0, a0, %tprel_lo(unspecified) -; NOPIC-NEXT: ret +; RV32-NOPIC-LABEL: f1: +; RV32-NOPIC: # %bb.0: # %entry +; RV32-NOPIC-NEXT: .LBB0_1: # %entry +; RV32-NOPIC-NEXT: # Label of block must be emitted +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(unspecified) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.LBB0_1)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: ret +; +; RV64-NOPIC-LABEL: f1: +; RV64-NOPIC: # %bb.0: # %entry +; RV64-NOPIC-NEXT: .LBB0_1: # %entry +; RV64-NOPIC-NEXT: # Label of block must be emitted +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(unspecified) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.LBB0_1)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: ret entry: ret i32* @unspecified } @@ -84,12 +96,23 @@ define i32* @f2() nounwind { ; RV64-PIC-NEXT: addi sp, sp, 16 ; RV64-PIC-NEXT: ret ; -; NOPIC-LABEL: f2: -; NOPIC: # %bb.0: # %entry -; NOPIC-NEXT: lui a0, %tprel_hi(ld) -; NOPIC-NEXT: add a0, a0, tp, %tprel_add(ld) -; NOPIC-NEXT: addi a0, a0, %tprel_lo(ld) -; NOPIC-NEXT: ret +; RV32-NOPIC-LABEL: f2: +; RV32-NOPIC: # %bb.0: # %entry +; RV32-NOPIC-NEXT: .LBB1_1: # %entry +; RV32-NOPIC-NEXT: # Label of block must be emitted +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.LBB1_1)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: ret +; +; RV64-NOPIC-LABEL: f2: +; RV64-NOPIC: # %bb.0: # %entry +; RV64-NOPIC-NEXT: .LBB1_1: # %entry +; RV64-NOPIC-NEXT: # Label of block must be emitted +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.LBB1_1)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: ret entry: ret i32* @ld } @@ -116,12 +139,23 @@ define i32* @f3() nounwind { ; RV64-PIC-NEXT: add a0, a0, tp ; RV64-PIC-NEXT: ret ; -; NOPIC-LABEL: f3: -; NOPIC: # %bb.0: # %entry -; NOPIC-NEXT: lui a0, %tprel_hi(ie) -; NOPIC-NEXT: add a0, a0, tp, %tprel_add(ie) -; NOPIC-NEXT: addi a0, a0, %tprel_lo(ie) -; NOPIC-NEXT: ret +; RV32-NOPIC-LABEL: f3: +; RV32-NOPIC: # %bb.0: # %entry +; RV32-NOPIC-NEXT: .LBB2_1: # %entry +; RV32-NOPIC-NEXT: # Label of block must be emitted +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.LBB2_1)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: ret +; +; RV64-NOPIC-LABEL: f3: +; RV64-NOPIC: # %bb.0: # %entry +; RV64-NOPIC-NEXT: .LBB2_1: # %entry +; RV64-NOPIC-NEXT: # Label of block must be emitted +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.LBB2_1)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: ret entry: ret i32* @ie } @@ -144,12 +178,19 @@ define i32* @f4() nounwind { ; RV64-PIC-NEXT: addi a0, a0, %tprel_lo(le) ; RV64-PIC-NEXT: ret ; -; NOPIC-LABEL: f4: -; NOPIC: # %bb.0: # %entry -; NOPIC-NEXT: lui a0, %tprel_hi(le) -; NOPIC-NEXT: add a0, a0, tp, %tprel_add(le) -; NOPIC-NEXT: addi a0, a0, %tprel_lo(le) -; NOPIC-NEXT: ret +; RV32-NOPIC-LABEL: f4: +; RV32-NOPIC: # %bb.0: # %entry +; RV32-NOPIC-NEXT: lui a0, %tprel_hi(le) +; RV32-NOPIC-NEXT: add a0, a0, tp, %tprel_add(le) +; RV32-NOPIC-NEXT: addi a0, a0, %tprel_lo(le) +; RV32-NOPIC-NEXT: ret +; +; RV64-NOPIC-LABEL: f4: +; RV64-NOPIC: # %bb.0: # %entry +; RV64-NOPIC-NEXT: lui a0, %tprel_hi(le) +; RV64-NOPIC-NEXT: add a0, a0, tp, %tprel_add(le) +; RV64-NOPIC-NEXT: addi a0, a0, %tprel_lo(le) +; RV64-NOPIC-NEXT: ret entry: ret i32* @le } diff --git a/llvm/test/CodeGen/SPARC/fp128.ll b/llvm/test/CodeGen/SPARC/fp128.ll index 83912e0f211ee..1f5d2db661d93 100644 --- a/llvm/test/CodeGen/SPARC/fp128.ll +++ b/llvm/test/CodeGen/SPARC/fp128.ll @@ -1,8 +1,17 @@ ; RUN: llc < %s -march=sparc -mattr=hard-quad-float | FileCheck %s --check-prefix=CHECK --check-prefix=HARD --check-prefix=BE ; RUN: llc < %s -march=sparcel -mattr=hard-quad-float | FileCheck %s --check-prefix=CHECK --check-prefix=HARD --check-prefix=EL -; RUN: llc < %s -march=sparc -mattr=-hard-quad-float | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT --check-prefix=BE +; RUN: llc < %s -march=sparc -mattr=-hard-quad-float -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT --check-prefix=BE ; RUN: llc < %s -march=sparcel -mattr=-hard-quad-float | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT --check-prefix=EL +; XFAIL: * +; This test currently fails with expensive checks enabled, for more details see +; https://bugs.llvm.org/show_bug.cgi?id=44091. +; *** Bad machine code: Expected a register operand. *** +; - function: f128_compare +; - basic block: %bb.0 entry (0x63f4028) +; - instruction: CMPrr killed %21:intregs, 0, implicit-def $icc +; - operand 1: 0 +; NB: When this is fixed the verifier should not be run by default in the CL above. ; CHECK-LABEL: f128_ops: ; CHECK: ldd diff --git a/llvm/test/CodeGen/SystemZ/fp-libcall.ll b/llvm/test/CodeGen/SystemZ/fp-libcall.ll index 75250b811cba5..2df25aaf814c4 100644 --- a/llvm/test/CodeGen/SystemZ/fp-libcall.ll +++ b/llvm/test/CodeGen/SystemZ/fp-libcall.ll @@ -233,6 +233,68 @@ define fp128 @f33(fp128 %x, fp128 %y) { ret fp128 %tmp } +; Verify that "nnan" minnum/maxnum calls are transformed to +; compare+select sequences instead of libcalls. +define float @f34(float %x, float %y) { +; CHECK-LABEL: f34: +; CHECK: cebr %f0, %f2 +; CHECK: blr %r14 +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %tmp = call nnan float @llvm.minnum.f32(float %x, float %y) + ret float %tmp +} + +define double @f35(double %x, double %y) { +; CHECK-LABEL: f35: +; CHECK: cdbr %f0, %f2 +; CHECK: blr %r14 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %tmp = call nnan double @llvm.minnum.f64(double %x, double %y) + ret double %tmp +} + +define fp128 @f36(fp128 %x, fp128 %y) { +; CHECK-LABEL: f36: +; CHECK: cxbr +; CHECK: jl +; CHECK: lxr +; CHECK: br %r14 + %tmp = call nnan fp128 @llvm.minnum.f128(fp128 %x, fp128 %y) + ret fp128 %tmp +} + +define float @f37(float %x, float %y) { +; CHECK-LABEL: f37: +; CHECK: cebr %f0, %f2 +; CHECK: bhr %r14 +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %tmp = call nnan float @llvm.maxnum.f32(float %x, float %y) + ret float %tmp +} + +define double @f38(double %x, double %y) { +; CHECK-LABEL: f38: +; CHECK: cdbr %f0, %f2 +; CHECK: bhr %r14 +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %tmp = call nnan double @llvm.maxnum.f64(double %x, double %y) + ret double %tmp +} + +define fp128 @f39(fp128 %x, fp128 %y) { +; CHECK-LABEL: f39: +; CHECK: cxbr +; CHECK: jh +; CHECK: lxr +; CHECK: br %r14 + %tmp = call nnan fp128 @llvm.maxnum.f128(fp128 %x, fp128 %y) + ret fp128 %tmp +} + declare float @llvm.powi.f32(float, i32) declare double @llvm.powi.f64(double, i32) declare fp128 @llvm.powi.f128(fp128, i32) diff --git a/llvm/test/CodeGen/Thumb/callee_save_reserved.ll b/llvm/test/CodeGen/Thumb/callee_save_reserved.ll deleted file mode 100644 index 0329d7886a2a9..0000000000000 --- a/llvm/test/CodeGen/Thumb/callee_save_reserved.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ -; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s - -; Reserved low registers should not be used to correct reg deficit. -define <4 x i32> @four_high_four_return_reserved() { -entry: - ; CHECK-INVALID-NOT: r{{6|8}} - tail call void asm sideeffect "", "~{r8},~{r9}"() - %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 - %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 - %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 - %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 - ret <4 x i32> %vecinit13 -} - diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll index 79c81ca7a449c..257d950c60fb3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll @@ -4,7 +4,7 @@ ; CHECK: vector.body: ; CHECK: %index = phi i32 ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <16 x i1> @llvm.arm.vctp8(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[ELEMS]]) ; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 16 ; CHECK: [[LD0:%[^ ]+]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* {{.*}}, i32 4, <16 x i1> [[VCTP]], <16 x i8> undef) ; CHECK: [[LD1:%[^ ]+]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* {{.*}}, i32 4, <16 x i1> [[VCTP]], <16 x i8> undef) @@ -57,7 +57,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; CHECK: vector.body: ; CHECK: %index = phi i32 ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.vctp16(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[ELEMS]]) ; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 8 ; CHECK: [[LD0:%[^ ]+]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) ; CHECK: [[LD1:%[^ ]+]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) @@ -109,7 +109,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; CHECK-LABEL: mul_v4i32 ; CHECK: vector.body: ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.vctp32(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) ; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 ; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) ; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) @@ -158,59 +158,11 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -; CHECK-LABEL: copy_v2i64 -; CHECK: vector.body: -; CHECK: %index = phi i32 -; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <2 x i1> @llvm.arm.vctp64(i32 [[ELEMS]]) -; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 2 -; CHECK: [[LD0:%[^ ]+]] = tail call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* {{.*}}, i32 4, <2 x i1> [[VCTP]], <2 x i64> undef) -; CHECK: tail call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> [[LD0]], <2 x i64>* {{.*}}, i32 4, <2 x i1> [[VCTP]]) -define void @copy_v2i64(i64* %a, i64* %b, i32 %N) { -entry: - %cmp8 = icmp eq i32 %N, 0 - %tmp8 = add i32 %N, 1 - %tmp9 = lshr i32 %tmp8, 1 - %tmp10 = shl nuw i32 %tmp9, 1 - %tmp11 = add i32 %tmp10, -2 - %tmp12 = lshr i32 %tmp11, 1 - %tmp13 = add nuw nsw i32 %tmp12, 1 - br i1 %cmp8, label %for.cond.cleanup, label %vector.ph - -vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <2 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <2 x i32> %broadcast.splatinsert10, <2 x i32> undef, <2 x i32> zeroinitializer - call void @llvm.set.loop.iterations.i32(i32 %tmp13) - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer - %induction = add <2 x i32> %broadcast.splat, - %tmp1 = icmp ule <2 x i32> %induction, %broadcast.splat11 - %tmp = getelementptr inbounds i64, i64* %a, i32 %index - %tmp2 = bitcast i64* %tmp to <2 x i64>* - %wide.masked.load = tail call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %tmp2, i32 4, <2 x i1> %tmp1, <2 x i64> undef) - %tmp3 = getelementptr inbounds i64, i64* %b, i32 %index - %tmp7 = bitcast i64* %tmp3 to <2 x i64>* - tail call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %wide.masked.load, <2 x i64>* %tmp7, i32 4, <2 x i1> %tmp1) - %index.next = add i32 %index, 2 - %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp14, i32 1) - %tmp16 = icmp ne i32 %tmp15, 0 - br i1 %tmp16, label %vector.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %vector.body, %entry - ret void -} - ; CHECK-LABEL: split_vector ; CHECK: vector.body: ; CHECK: %index = phi i32 ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.vctp32(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) ; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 ; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) ; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) @@ -268,7 +220,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; One of the loads now uses ult predicate. ; CHECK-LABEL: mismatch_load_pred ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.vctp32(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) ; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 ; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) ; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> %wrong, <4 x i32> undef) @@ -322,7 +274,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; CHECK-LABEL: mismatch_store_pred ; CHECK: %index = phi i32 ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.vctp32(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]]) ; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4 ; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) ; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 5900dd9ac66a9..0b50b9a1db4e9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -154,11 +154,11 @@ for.cond.cleanup: ; preds = %middle.block, %entr ; CHECK-NEXT: vldrwt.u32 ; CHECK-NEXT: vldrwt.u32 ; CHECK: mov [[ELEMS_OUT:r[0-9]+]], [[ELEMS]] +; CHECK: sub{{.*}} [[ELEMS]],{{.*}}#4 ; CHECK: vpsttt ; CHECK-NEXT: vcmpt.i32 eq, {{.*}}, zr ; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r3] ; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r2] -; CHECK: sub{{.*}} [[ELEMS]],{{.*}}#4 ; CHECK: le lr, [[LOOP]] ; CHECK: vctp.32 [[ELEMS_OUT]] ; CHECK: vpsel diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 02d05ef9c0f61..f7c9236c6e62f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -36,21 +36,14 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .LBB0_4: @ %vector.ph -; CHECK-NEXT: adds r6, r3, #3 -; CHECK-NEXT: bic r6, r6, #3 -; CHECK-NEXT: subs r6, #4 -; CHECK-NEXT: add.w lr, r12, r6, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] -; CHECK-NEXT: vmul.f32 q0, q1, q0 -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r2], #16 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vmul.f32 q0, q1, q0 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB0_5 ; CHECK-NEXT: b .LBB0_11 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new @@ -240,13 +233,11 @@ define arm_aapcs_vfpcc float @fast_float_mac(float* nocapture readonly %b, float ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q2, [r0] -; CHECK-NEXT: vldrwt.u32 q3, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r0, #16 -; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 +; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vfma.f32 q0, q3, q2 ; CHECK-NEXT: le lr, .LBB1_2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 38e688bbf6288..23c447284293f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -16,17 +16,19 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r12, r2 ; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: vldrb.u32 q2, [r2] +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrbt.u32 q2, [r2] ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: sub.w r2, r12, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -82,19 +84,13 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.s32 q2, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r1, #8 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vldrh.s32 q2, [r1], #8 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: letp lr, .LBB1_1 @@ -160,17 +156,19 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r12, r2 ; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: vldrb.u32 q2, [r2] +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrbt.u32 q2, [r2] ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: sub.w r2, r12, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: letp lr, .LBB2_1 +; CHECK-NEXT: le lr, .LBB2_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -226,19 +224,13 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u32 q2, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r1, #8 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vldrh.u32 q2, [r1], #8 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: letp lr, .LBB3_1 @@ -297,19 +289,13 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q2, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: letp lr, .LBB4_1 @@ -392,13 +378,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB5_9 ; CHECK-NEXT: .LBB5_4: @ %vector.ph -; CHECK-NEXT: add.w r7, r12, #3 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: bic r7, r7, #3 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB5_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r5, r0, r4 @@ -406,11 +387,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: adds r5, r1, r4 ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB5_5 ; CHECK-NEXT: b .LBB5_12 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new @@ -607,23 +587,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w lr, r12, #3 -; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: bic lr, lr, #3 -; CHECK-NEXT: sub.w lr, lr, #4 -; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: vldrh.s32 q1, [r1] +; CHECK-NEXT: vldrh.s32 q0, [r0], #8 +; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -703,13 +675,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB7_9 ; CHECK-NEXT: .LBB7_4: @ %vector.ph -; CHECK-NEXT: add.w r7, r12, #3 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: bic r7, r7, #3 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB7_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r5, r0, r4 @@ -717,11 +684,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: adds r5, r1, r4 ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB7_5 ; CHECK-NEXT: b .LBB7_12 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new @@ -918,23 +884,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w lr, r12, #3 -; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: bic lr, lr, #3 -; CHECK-NEXT: sub.w lr, lr, #4 -; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: vldrh.u32 q1, [r1] +; CHECK-NEXT: vldrh.u32 q0, [r0], #8 +; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -1016,22 +974,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_8 ; CHECK-NEXT: .LBB9_4: @ %vector.ph -; CHECK-NEXT: add.w r4, r12, #3 -; CHECK-NEXT: bic r4, r4, #3 -; CHECK-NEXT: subs r4, #4 -; CHECK-NEXT: add.w lr, lr, r4, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: adds r0, #16 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB9_5 ; CHECK-NEXT: b .LBB9_11 ; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new @@ -1217,24 +1168,18 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: sub.w r12, r12, #8 -; CHECK-NEXT: add.w lr, lr, r12, lsr #3 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dlstp.16 lr, lr +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB10_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r4, r1, r12 ; CHECK-NEXT: vldrb.u16 q0, [r4] ; CHECK-NEXT: add.w r4, r2, r12 -; CHECK-NEXT: vldrb.u16 q1, [r4] -; CHECK-NEXT: vmul.i16 q0, q1, q0 -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #16 ; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vldrb.u16 q1, [r4] +; CHECK-NEXT: vmul.i16 q0, q1, q0 +; CHECK-NEXT: vstrh.16 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB10_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll index 2f9d301e8086d..f67a59f74fb80 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll @@ -28,7 +28,7 @@ define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readon ; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP2]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP8]], i32 2, <4 x i1> [[TMP1]], <4 x i16> undef) @@ -140,7 +140,7 @@ define void @mat_vec_i32(i32** nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP2]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.vctp32(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP8]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir new file mode 100644 index 0000000000000..c5a38ea13454f --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir @@ -0,0 +1,451 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-cp-islands %s -o - | FileCheck %s +--- | + @d = hidden local_unnamed_addr global i32 0, align 4 + @a = hidden global i32 0, align 4 + @e = hidden local_unnamed_addr global i32 0, align 4 + + define hidden void @f(i64 %g) { + entry: + %conv = trunc i64 %g to i32 + %tobool5 = icmp eq i64 %g, 0 + br i1 %tobool5, label %j.us.us.preheader, label %entry.split + + j.us.us.preheader: ; preds = %entry + %.pre59 = load i32, i32* @d, align 4 + br label %j.us.us + + j.us.us: ; preds = %j.us.us, %if.end.us.us.us, %if.end.us.us.us.1, %if.end.us.us.us.2, %if.end.us.us.us.3, %if.end.us.us.us.4, %if.end.us.us.us.5, %if.end.us.us.us.6, %j.us.us.preheader + %0 = phi i32 [ %.pre59, %j.us.us.preheader ], [ %12, %if.end.us.us.us.6 ], [ %11, %if.end.us.us.us.5 ], [ %10, %if.end.us.us.us.4 ], [ %9, %if.end.us.us.us.3 ], [ %8, %if.end.us.us.us.2 ], [ %7, %if.end.us.us.us.1 ], [ %2, %if.end.us.us.us ], [ %0, %j.us.us ] + %cmp.us.us = icmp slt i32 %0, ptrtoint (i32* @a to i32) + %conv1.us.us = zext i1 %cmp.us.us to i32 + %1 = load i32, i32* @e, align 4 + %and.us.us = and i32 %1, %conv1.us.us + store i32 %and.us.us, i32* @e, align 4 + %tobool4.us.us.us = icmp eq i32 %0, 0 + br i1 %tobool4.us.us.us, label %if.end.us.us.us, label %j.us.us + + if.end.us.us.us: ; preds = %j.us.us + tail call void asm sideeffect "", ""() + %2 = load i32, i32* @d, align 4 + %tobool4.us.us.us.1 = icmp eq i32 %2, 0 + br i1 %tobool4.us.us.us.1, label %if.end.us.us.us.1, label %j.us.us + + entry.split: ; preds = %entry + %tobool = icmp eq i32 %conv, 0 + br i1 %tobool, label %j.us27.preheader, label %j.preheader + + j.preheader: ; preds = %entry.split + %.pre = load i32, i32* @e, align 4 + %.pre55 = load i32, i32* @d, align 4 + %cmp = icmp slt i32 %conv, ptrtoint (i32* @a to i32) + %conv1 = zext i1 %cmp to i32 + br label %j + + j.us27.preheader: ; preds = %entry.split + %.pre56 = load i32, i32* @d, align 4 + %.pre57 = load i32, i32* @e, align 4 + %cmp.us29 = icmp slt i32 %.pre56, ptrtoint (i32* @a to i32) + %conv1.us30 = zext i1 %cmp.us29 to i32 + br label %j.us27 + + j.us27: ; preds = %j.us27, %j.us27.preheader + %3 = phi i32 [ %.pre57, %j.us27.preheader ], [ %and.us31, %j.us27 ] + %4 = icmp eq i32 %.pre56, 0 + %and.us31 = and i32 %3, %conv1.us30 + br i1 %4, label %if.end.us38, label %j.us27 + + if.end.us38: ; preds = %j.us27 + store i32 %and.us31, i32* @e, align 4 + tail call void asm sideeffect "", ""() + ret void + + j: ; preds = %j, %j.preheader + %5 = phi i32 [ %.pre, %j.preheader ], [ %and, %j ] + %6 = icmp eq i32 %.pre55, 0 + %and = and i32 %5, %conv1 + br i1 %6, label %if.end, label %j + + if.end: ; preds = %j + store i32 %and, i32* @e, align 4 + tail call void asm sideeffect "", ""() + ret void + + if.end.us.us.us.1: ; preds = %if.end.us.us.us + tail call void asm sideeffect "", ""() + %7 = load i32, i32* @d, align 4 + %tobool4.us.us.us.2 = icmp eq i32 %7, 0 + br i1 %tobool4.us.us.us.2, label %if.end.us.us.us.2, label %j.us.us + + if.end.us.us.us.2: ; preds = %if.end.us.us.us.1 + tail call void asm sideeffect "", ""() + %8 = load i32, i32* @d, align 4 + %tobool4.us.us.us.3 = icmp eq i32 %8, 0 + br i1 %tobool4.us.us.us.3, label %if.end.us.us.us.3, label %j.us.us + + if.end.us.us.us.3: ; preds = %if.end.us.us.us.2 + tail call void asm sideeffect "", ""() + %9 = load i32, i32* @d, align 4 + %tobool4.us.us.us.4 = icmp eq i32 %9, 0 + br i1 %tobool4.us.us.us.4, label %if.end.us.us.us.4, label %j.us.us + + if.end.us.us.us.4: ; preds = %if.end.us.us.us.3 + tail call void asm sideeffect "", ""() + %10 = load i32, i32* @d, align 4 + %tobool4.us.us.us.5 = icmp eq i32 %10, 0 + br i1 %tobool4.us.us.us.5, label %if.end.us.us.us.5, label %j.us.us + + if.end.us.us.us.5: ; preds = %if.end.us.us.us.4 + tail call void asm sideeffect "", ""() + %11 = load i32, i32* @d, align 4 + %tobool4.us.us.us.6 = icmp eq i32 %11, 0 + br i1 %tobool4.us.us.us.6, label %if.end.us.us.us.6, label %j.us.us + + if.end.us.us.us.6: ; preds = %if.end.us.us.us.5 + tail call void asm sideeffect "", ""() + %12 = load i32, i32* @d, align 4 + %tobool4.us.us.us.7 = icmp eq i32 %12, 0 + br i1 %tobool4.us.us.us.7, label %if.end.us.us.us.7, label %j.us.us + + if.end.us.us.us.7: ; preds = %if.end.us.us.us.6 + tail call void asm sideeffect "", ""() + ret void + } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.5(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $r0, $r1, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: dead renamable $r1, $cpsr = tORR killed renamable $r1, renamable $r0, 14, $noreg + ; CHECK: tBcc %bb.5, 0, killed $cpsr + ; CHECK: bb.1.entry.split: + ; CHECK: successors: %bb.15(0x30000000), %bb.2(0x50000000) + ; CHECK: liveins: $r0 + ; CHECK: tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.15, 0, killed $cpsr + ; CHECK: bb.2.j.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $r0 + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + ; CHECK: tCMPr killed renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + ; CHECK: renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @d, 14, $noreg + ; CHECK: renamable $r2 = tLDRi killed renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: bb.3.j (align 4): + ; CHECK: successors: %bb.4(0x04000000), %bb.3(0x7c000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r0, 14, $noreg + ; CHECK: tCBZ $r2, %bb.4 + ; CHECK: bb.4.if.end: + ; CHECK: liveins: $r1, $r3 + ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + ; CHECK: INLINEASM &"", 1 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc + ; CHECK: bb.5.j.us.us.preheader: + ; CHECK: successors: %bb.6(0x80000000) + ; CHECK: $r12 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + ; CHECK: $lr = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + ; CHECK: $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @d, 14, $noreg + ; CHECK: $r2 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: $lr = t2MOVTi16 killed $lr, target-flags(arm-hi16) @a, 14, $noreg + ; CHECK: $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @e, 14, $noreg + ; CHECK: bb.6.j.us.us (align 4): + ; CHECK: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r3, $r12 + ; CHECK: tCMPhir renamable $r3, renamable $lr, 14, $noreg, implicit-def $cpsr + ; CHECK: renamable $r1 = tLDRi renamable $r2, 0, 14, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + ; CHECK: renamable $r0 = t2ANDrr killed renamable $r0, killed renamable $r1, 14, $noreg, $noreg + ; CHECK: tSTRi killed renamable $r0, renamable $r2, 0, 14, $noreg :: (store 4 into @e) + ; CHECK: tCBZ $r3, %bb.7 + ; CHECK: bb.7.if.end.us.us.us: + ; CHECK: successors: %bb.8(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.8 + ; CHECK: bb.8.if.end.us.us.us.1: + ; CHECK: successors: %bb.9(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.9 + ; CHECK: bb.9.if.end.us.us.us.2: + ; CHECK: successors: %bb.10(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.10 + ; CHECK: bb.10.if.end.us.us.us.3: + ; CHECK: successors: %bb.11(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.11 + ; CHECK: bb.11.if.end.us.us.us.4: + ; CHECK: successors: %bb.12(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.12 + ; CHECK: bb.12.if.end.us.us.us.5: + ; CHECK: successors: %bb.13(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.13 + ; CHECK: bb.13.if.end.us.us.us.6: + ; CHECK: successors: %bb.14(0x04000000), %bb.6(0x7c000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.14 + ; CHECK: bb.14.if.end.us.us.us.7: + ; CHECK: INLINEASM &"", 1 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc + ; CHECK: bb.15.j.us27.preheader: + ; CHECK: successors: %bb.16(0x80000000) + ; CHECK: $r0 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + ; CHECK: $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @d, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCMPr renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + ; CHECK: renamable $r2 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: bb.16.j.us27 (align 4): + ; CHECK: successors: %bb.17(0x04000000), %bb.16(0x7c000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r2, 14, $noreg + ; CHECK: tCBZ $r0, %bb.17 + ; CHECK: bb.17.if.end.us38: + ; CHECK: liveins: $r1, $r3 + ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + ; CHECK: INLINEASM &"", 1 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc + bb.0.entry: + successors: %bb.1(0x30000000), %bb.11(0x50000000) + liveins: $r0, $r1, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + dead renamable $r1, $cpsr = tORR killed renamable $r1, renamable $r0, 14, $noreg + t2Bcc %bb.1, 0, killed $cpsr + + bb.11.entry.split: + successors: %bb.15(0x30000000), %bb.12(0x50000000) + liveins: $r0 + + tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.15, 0, killed $cpsr + + bb.12.j.preheader: + successors: %bb.13(0x80000000) + liveins: $r0 + + $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + tCMPr killed renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + $r1 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @d, 14, $noreg + renamable $r2 = tLDRi killed renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @d) + $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + + bb.13.j (align 4): + successors: %bb.14(0x04000000), %bb.13(0x7c000000) + liveins: $r0, $r1, $r2, $r3 + + renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r0, 14, $noreg + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.13, 1, killed $cpsr + + bb.14.if.end: + liveins: $r1, $r3 + + tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + INLINEASM &"", 1 + tPOP_RET 14, $noreg, def $r7, def $pc + + bb.1.j.us.us.preheader: + successors: %bb.2(0x80000000) + + $r12 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + $lr = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @d, 14, $noreg + $r2 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + $lr = t2MOVTi16 killed $lr, target-flags(arm-hi16) @a, 14, $noreg + $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @e, 14, $noreg + + bb.2.j.us.us (align 4): + successors: %bb.3(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r3, $r12 + + tCMPhir renamable $r3, renamable $lr, 14, $noreg, implicit-def $cpsr + renamable $r1 = tLDRi renamable $r2, 0, 14, $noreg :: (dereferenceable load 4 from @e) + renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + renamable $r0 = t2ANDrr killed renamable $r0, killed renamable $r1, 14, $noreg, $noreg + tSTRi killed renamable $r0, renamable $r2, 0, 14, $noreg :: (store 4 into @e) + t2Bcc %bb.2, 1, killed $cpsr + + bb.3.if.end.us.us.us: + successors: %bb.4(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.4.if.end.us.us.us.1: + successors: %bb.5(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.5.if.end.us.us.us.2: + successors: %bb.6(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.6.if.end.us.us.us.3: + successors: %bb.7(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.7.if.end.us.us.us.4: + successors: %bb.8(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.8.if.end.us.us.us.5: + successors: %bb.9(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.9.if.end.us.us.us.6: + successors: %bb.10(0x04000000), %bb.2(0x7c000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.10.if.end.us.us.us.7: + INLINEASM &"", 1 + tPOP_RET 14, $noreg, def $r7, def $pc + + bb.15.j.us27.preheader: + successors: %bb.16(0x80000000) + + $r0 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @d, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPr renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + renamable $r2 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + + bb.16.j.us27 (align 4): + successors: %bb.17(0x04000000), %bb.16(0x7c000000) + liveins: $r0, $r1, $r2, $r3 + + renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r2, 14, $noreg + tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.16, 1, killed $cpsr + + bb.17.if.end.us38: + liveins: $r1, $r3 + + tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + INLINEASM &"", 1 + tPOP_RET 14, $noreg, def $r7, def $pc + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll index 70e272ffc0dce..330c6db24a74c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll @@ -1,7 +1,7 @@ ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: expand_v8i16_v8i32 -; CHECK-NOT: call i32 @llvm.arm.vctp +; CHECK-NOT: call i32 @llvm.arm.mve.vctp define void @expand_v8i16_v8i32(i16* noalias nocapture readonly %a, i16* noalias nocapture readonly %b, i32* noalias nocapture %c, i32 %N) { entry: %cmp8 = icmp eq i32 %N, 0 @@ -50,7 +50,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; CHECK-LABEL: expand_v8i16_v4i32 ; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS_REM:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.vctp16(i32 [[ELEMS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[ELEMS]]) ; CHECK: [[ELEMS_REM]] = sub i32 [[ELEMS]], 8 ; CHECK: tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) ; CHECK: %store.pred = icmp ule <4 x i32> %induction.store @@ -117,7 +117,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry } ; CHECK-LABEL: expand_v4i32_v4i64 -; CHECK-NOT: call i32 @llvm.arm.vctp +; CHECK-NOT: call i32 @llvm.arm.mve.vctp define void @expand_v4i32_v4i64(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i64* noalias nocapture %c, i32 %N) { entry: %cmp8 = icmp eq i32 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll index 7cdd28fd0f3cf..c7ed9ce674dd4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll @@ -5,7 +5,7 @@ ; CHECK: phi <8 x i16> [ zeroinitializer, %entry ] ; CHECK: phi i32 ; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %entry ], [ [[ELEMS:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.vctp16(i32 [[PHI]]) +; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]]) ; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8 ; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) ; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp6, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) @@ -63,7 +63,7 @@ middle.block: ; preds = %vector.body ; CHECK: phi <8 x i16> [ zeroinitializer, %entry ] ; CHECK: phi i32 ; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %entry ], [ [[ELEMS:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.vctp16(i32 [[PHI]]) +; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]]) ; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8 ; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef) define i16 @reduction_i32_with_scalar(i16* nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir new file mode 100644 index 0000000000000..2ccb8da48d841 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops -verify-machineinstrs %s -o - | FileCheck %s +# Check that subs isn't used during the revert because there's a def after LoopDec. + +--- | + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + %limit = lshr i32 %n, 1 + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] + %tmp = phi i32 [ %n, %entry ], [ %tmp2, %while.body ] + %scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp1 = load i32, i32* %scevgep7, align 4 + %tmp2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp, i32 1) + %half = lshr i32 %tmp1, 1 + %cmp = icmp ult i32 %tmp, %limit + %res = select i1 %cmp, i32 %tmp1, i32 %half + store i32 %res, i32* %scevgep4, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp3 = icmp ne i32 %tmp2, 0 + br i1 %tmp3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #0 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: do_copy + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: $lr = tMOVr killed $r0, 14, $noreg + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + ; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $r2 = t2LSRri renamable $lr, 1, 14, $noreg, $noreg + ; CHECK: bb.1.while.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + ; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14, $noreg, $noreg + ; CHECK: t2IT 2, 8, implicit-def $itstate + ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + ; CHECK: t2CMPri renamable $lr, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.1, 4, killed $cpsr + ; CHECK: tB %bb.2, 14, $noreg + ; CHECK: bb.2.while.end: + ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $lr = tMOVr killed $r0, 14, $noreg + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + renamable $r2 = t2LSRri renamable $lr, 1, 14, $noreg, $noreg + t2DoLoopStart renamable $lr + + bb.1.while.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1, $r2 + + renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2IT 2, 8, implicit-def $itstate + renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + t2CMPri renamable $lr, 0, 14, $noreg, implicit-def $cpsr + tBcc %bb.1, 4, killed $cpsr + tB %bb.2, 14, $noreg + + bb.2.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir new file mode 100644 index 0000000000000..c052e22d217d6 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops -verify-machineinstrs %s -o - | FileCheck %s +# Check that subs isn't used during the revert because there's a cpsr use after it. + +--- | + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + %limit = lshr i32 %n, 1 + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] + %tmp = phi i32 [ %n, %entry ], [ %tmp2, %while.body ] + %scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp1 = load i32, i32* %scevgep7, align 4 + %tmp2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp, i32 1) + %half = lshr i32 %tmp1, 1 + %cmp = icmp ult i32 %tmp, %limit + %res = select i1 %cmp, i32 %tmp1, i32 %half + store i32 %res, i32* %scevgep4, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp3 = icmp ne i32 %tmp2, 0 + br i1 %tmp3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #0 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: do_copy + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + ; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $r2 = t2LSRri renamable $r0, 1, 14, $noreg, $noreg + ; CHECK: $lr = tMOVr killed $r0, 14, $noreg + ; CHECK: bb.1.while.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + ; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14, $noreg, $noreg + ; CHECK: t2IT 2, 8, implicit-def $itstate + ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + ; CHECK: t2CMPri $lr, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.1, 1, $cpsr + ; CHECK: tB %bb.2, 14, $noreg + ; CHECK: bb.2.while.end: + ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + t2DoLoopStart renamable $r0 + renamable $r2 = t2LSRri renamable $r0, 1, 14, $noreg, $noreg + $lr = tMOVr killed $r0, 14, $noreg + + bb.1.while.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1, $r2 + + renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2IT 2, 8, implicit-def $itstate + renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index 02bf12ce62004..04f408d78acb8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -9,28 +9,21 @@ define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* no ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r1] +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: vmul.i32 q0, q2, q0 -; CHECK-NEXT: adds r0, #16 -; CHECK-NEXT: adds r1, #16 +; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} entry: @@ -82,19 +75,13 @@ define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r1, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: bic r1, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: letp lr, .LBB1_1 @@ -148,19 +135,13 @@ define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r1, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: bic r1, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: letp lr, .LBB2_1 @@ -213,20 +194,13 @@ define dso_local void @vector_mul_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vmul.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vmul.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -272,20 +246,13 @@ define dso_local void @vector_add_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -331,13 +298,8 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(i8* noalias nocaptur ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w r12, r3, #15 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #15 -; CHECK-NEXT: sub.w r12, r12, #16 -; CHECK-NEXT: add.w lr, lr, r12, lsr #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dlstp.8 lr, lr +; CHECK-NEXT: dlstp.8 lr, r3 ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r4, r1, r12 @@ -396,22 +358,14 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(i16* noalias nocapt ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: sub.w r12, r12, #8 -; CHECK-NEXT: add.w lr, lr, r12, lsr #3 -; CHECK-NEXT: dlstp.16 lr, lr +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] -; CHECK-NEXT: vmul.i16 q0, q1, q0 -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q1, [r2], #16 ; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vmul.i16 q0, q1, q0 +; CHECK-NEXT: vstrh.16 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll index dbf40f60cbd9a..38dc5ce54bcbd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -6,13 +6,13 @@ ; CHECK: vector.body: ; CHECK-NOT: phi i32 [ 0, %vector.ph ] ; CHECK: [[ELTS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[SUB:%[^ ]+]], %vector.body ] -; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.vctp32(i32 [[ELTS]]) +; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELTS]]) ; CHECK: [[SUB]] = sub i32 [[ELTS]], 4 ; CHECK: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]] ; CHECK: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{.*}}, i32 4, <4 x i1> [[VCTP]], ; CHECK: middle.block: -; CHECK: [[VCTP_CLONE:%[^ ]+]] = call <4 x i1> @llvm.arm.vctp32(i32 [[ELTS]]) +; CHECK: [[VCTP_CLONE:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELTS]]) ; CHECK: [[VPSEL:%[^ ]+]] = select <4 x i1> [[VCTP_CLONE]], ; CHECK: call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]]) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index 69f23f6050131..33389f4c2941c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -195,12 +195,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 - ; CHECK: renamable $r12 = t2ADDri renamable $r3, 15, 14, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 16, 14, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_8 renamable $lr, %bb.1 + ; CHECK: $lr = MVE_WLSTP_8 renamable $r3, %bb.1 ; CHECK: tB %bb.3, 14, $noreg ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) @@ -216,7 +211,7 @@ body: | ; CHECK: renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg ; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg - ; CHECK: renamable $q0 = MVE_VMULt1i8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store 16 into %ir.scevgep1, align 1) ; CHECK: $lr = MVE_LETP renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -257,7 +252,7 @@ body: | renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg - renamable $q0 = MVE_VMULt1i8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 1, killed renamable $vpr :: (store 16 into %ir.scevgep1, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -323,19 +318,14 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 - ; CHECK: renamable $r12 = t2ADDri renamable $r3, 7, 14, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 7, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 8, 14, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 27, 14, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_16 renamable $lr, %bb.1 + ; CHECK: $lr = MVE_WLSTP_16 renamable $r3, %bb.1 ; CHECK: tB %bb.2, 14, $noreg ; CHECK: bb.1.vector.body: ; CHECK: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 0, $noreg :: (load 16 from %ir.lsr.iv57, align 2) ; CHECK: renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 2) - ; CHECK: renamable $q0 = MVE_VMULt1i16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 2) ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14, $noreg @@ -368,7 +358,7 @@ body: | MVE_VPST 4, implicit $vpr renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv57, align 2) renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 2) - renamable $q0 = MVE_VMULt1i16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 2) renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg @@ -437,13 +427,8 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 - ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg ; CHECK: renamable $r12 = t2MOVi 0, 14, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_32 renamable $lr, %bb.1 + ; CHECK: $lr = MVE_WLSTP_32 $r2, %bb.1 ; CHECK: tB %bb.4, 14, $noreg ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) @@ -456,7 +441,7 @@ body: | ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 4) ; CHECK: renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 0, killed $noreg :: (load 16 from %ir.lsr.iv1, align 4) ; CHECK: $r3 = tMOVr $r2, 14, $noreg - ; CHECK: renamable $q1 = nsw MVE_VMULt1i32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14, $noreg @@ -505,7 +490,7 @@ body: | renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) $r3 = tMOVr $r2, 14, $noreg - renamable $q1 = nsw MVE_VMULt1i32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir b/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir index 58ddfcc2a683e..1f5edb0c78b91 100644 --- a/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir +++ b/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir @@ -1,54 +1,89 @@ -# RUN: llc -mtriple=thumbv7 -start-before=if-converter -o - %s | FileCheck %s +# RUN: llc -mtriple=thumbv7 -start-before=if-converter %s -o - | FileCheck %s + +--- | + ; ModuleID = 'vdup-test.ll' + source_filename = "vdup-test.ll" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7" + + define arm_aapcs_vfpcc <2 x i32> @NeonVdupMul(i32 %scalar, i32 %N, <2 x i32> %vector) { + entry: + %cmp = icmp ne i32 %N, 0 + %broadcast = insertelement <2 x i32> undef, i32 %scalar, i32 0 + %dup = shufflevector <2 x i32> %broadcast, <2 x i32> undef, <2 x i32> zeroinitializer + %mul = mul <2 x i32> %dup, %vector + br i1 %cmp, label %select.end, label %select.false + + select.false: ; preds = %entry + br label %select.end + + select.end: ; preds = %entry, %select.false + %res = phi <2 x i32> [ %mul, %entry ], [ %vector, %select.false ] + ret <2 x i32> %res + } + +... --- name: NeonVdupMul +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$d0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} body: | - bb.0: - successors: %bb.2, %bb.1 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) liveins: $d0, $r0, $r1 - - t2CMPri killed $r1, 0, 14, $noreg, implicit-def $cpsr + + t2CMPri killed renamable $r1, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 0, killed $cpsr - + bb.1: + successors: %bb.2(0x80000000) liveins: $d0, $r0 - - $d16 = VDUP32d killed $r0, 14, $noreg + + renamable $d16 = VDUP32d killed renamable $r0, 14, $noreg ; Verify that the neon instructions haven't been conditionalized: ; CHECK-LABEL: NeonVdupMul ; CHECK: vdup.32 ; CHECK: vmul.i32 - $d0 = VMULv2i32 killed $d16, killed $d0, 14, $noreg - - bb.2: + renamable $d0 = VMULv2i32 killed renamable $d16, killed renamable $d0, 14, $noreg + + bb.2.select.end: liveins: $d0 - - tBX_RET 14, $noreg, implicit $d0 - -... ---- -name: NeonVmovVfpLdr -body: | - bb.0.entry: - successors: %bb.1, %bb.2 - liveins: $r0, $r1 - - t2CMPri killed $r1, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.2, 1, killed $cpsr - - bb.1: - $d0 = VMOVv2i32 0, 14, $noreg - tBX_RET 14, $noreg, implicit $d0 - - bb.2: - liveins: $r0 - - $d0 = VLDRD killed $r0, 0, 14, $noreg - ; Verify that the neon instruction VMOVv2i32 hasn't been conditionalized, - ; but the VLDR instruction that is available both in the VFP and Advanced - ; SIMD extensions has. - ; CHECK-LABEL: NeonVmovVfpLdr - ; CHECK-DAG: vmov.i32 d0, #0x0 - ; CHECK-DAG: vldr{{ne|eq}} d0, [r0] + tBX_RET 14, $noreg, implicit $d0 ... diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll new file mode 100644 index 0000000000000..f5b541203f6a3 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll @@ -0,0 +1,219 @@ +; RUN: opt -instcombine %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s + +declare <16 x i1> @llvm.arm.mve.vctp8(i32) +declare <8 x i1> @llvm.arm.mve.vctp16(i32) +declare <4 x i1> @llvm.arm.mve.vctp32(i32) +declare <4 x i1> @llvm.arm.mve.vctp64(i32) + +declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) +declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>) +declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) { +; CHECK-LABEL: test_vctp8q: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vctp.8 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a) + %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0) + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) { +; CHECK-LABEL: test_vctp8q_m: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.8 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a) + %3 = and <16 x i1> %1, %2 + %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3) + %5 = trunc i32 %4 to i16 + ret i16 %5 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) { +; CHECK-LABEL: test_vctp16q: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vctp.16 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a) + %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0) + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) { +; CHECK-LABEL: test_vctp16q_m: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.16 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a) + %3 = and <8 x i1> %1, %2 + %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3) + %5 = trunc i32 %4 to i16 + ret i16 %5 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) { +; CHECK-LABEL: test_vctp32q: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vctp.32 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a) + %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) { +; CHECK-LABEL: test_vctp32q_m: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.32 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a) + %3 = and <4 x i1> %1, %2 + %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) + %5 = trunc i32 %4 to i16 + ret i16 %5 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) { +; CHECK-LABEL: test_vctp64q: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vctp.64 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) + %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) { +; CHECK-LABEL: test_vctp64q_m: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.64 r0 +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) + %3 = and <4 x i1> %1, %2 + %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) + %5 = trunc i32 %4 to i16 + ret i16 %5 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vpselq_i8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) #2 { +; CHECK-LABEL: test_vpselq_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vpselq_i16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) #2 { +; CHECK-LABEL: test_vpselq_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vpselq_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) #2 { +; CHECK-LABEL: test_vpselq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = select <8 x i1> %1, <8 x half> %a, <8 x half> %b + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vpselq_i32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) #2 { +; CHECK-LABEL: test_vpselq_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vpselq_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) #2 { +; CHECK-LABEL: test_vpselq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = select <4 x i1> %1, <4 x float> %a, <4 x float> %b + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vpselq_i64(<2 x i64> %a, <2 x i64> %b, i16 zeroext %p) #2 { +; CHECK-LABEL: test_vpselq_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = bitcast <2 x i64> %a to <4 x i32> + %3 = bitcast <2 x i64> %b to <4 x i32> + %4 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %3 + %5 = bitcast <4 x i32> %4 to <2 x i64> + ret <2 x i64> %5 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll new file mode 100644 index 0000000000000..bafff00ea1de9 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabd.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabd.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabdt.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabdt.f16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll new file mode 100644 index 0000000000000..1b1d498bc378d --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = and <16 x i8> %b, %a + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %b, %a + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vandq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = and <4 x i32> %1, %0 + %3 = bitcast <4 x i32> %2 to <4 x float> + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vandq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vandq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vandq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vandq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vandq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vandq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll new file mode 100644 index 0000000000000..47877a13cb96e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %b, + %1 = and <16 x i8> %0, %a + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %b, + %1 = and <4 x i32> %0, %a + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %b, + %1 = and <8 x i16> %0, %a + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x float> @test_vbicq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = xor <4 x i32> %1, + %3 = and <4 x i32> %2, %0 + %4 = bitcast <4 x i32> %3 to <4 x float> + ret <4 x float> %4 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vbicq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vbicq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vbicq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vbicq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vbicq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll new file mode 100644 index 0000000000000..9b66f3656eb27 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %b, %a + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %b, %a + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_veorq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = xor <4 x i32> %1, %0 + %3 = bitcast <4 x i32> %2 to <4 x float> + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <16 x i8> @test_veorq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_veorq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: veort q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.eor.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.eor.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_veorq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_veorq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: veort q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.eor.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.eor.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_veorq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_veorq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: veort q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.eor.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.eor.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll new file mode 100644 index 0000000000000..d89308bb59412 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>) #1 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1 + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll new file mode 100644 index 0000000000000..09a7d60cd1650 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vmaxq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vmaxq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vmaxq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll new file mode 100644 index 0000000000000..10cd674d39a8f --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>) #1 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1 + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vminnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vminnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll new file mode 100644 index 0000000000000..0cbef86c928f7 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll new file mode 100644 index 0000000000000..78ee17b554160 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmulhq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmulh.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vmulh.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.arm.mve.vmulh.v16i8(<16 x i8>, <16 x i8>) #1 + +define arm_aapcs_vfpcc <8 x i16> @test_vmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmulhq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmulh.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vmulh.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %0 +} + +declare <8 x i16> @llvm.arm.mve.vmulh.v8i16(<8 x i16>, <8 x i16>) #1 + +define arm_aapcs_vfpcc <4 x i32> @test_vmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmulhq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmulh.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vmulh.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vmulh.v4i32(<4 x i32>, <4 x i32>) #1 + +define arm_aapcs_vfpcc <16 x i8> @test_vmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmulhq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmulht.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1 + +declare <16 x i8> @llvm.arm.mve.mulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1 + +define arm_aapcs_vfpcc <8 x i16> @test_vmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmulhq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmulht.s16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1 + +declare <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1 + +define arm_aapcs_vfpcc <4 x i32> @test_vmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmulhq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmulht.s32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.mulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1 + +declare <4 x i32> @llvm.arm.mve.mulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll new file mode 100644 index 0000000000000..09d8e11a71aed --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmul.i32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = mul <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vmulq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmul.f32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = fmul <4 x float> %b, %a + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmulq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vmulq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmult.i8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vmulq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vmulq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmult.f16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll new file mode 100644 index 0000000000000..48f6a3cd23ad2 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %b, + %1 = or <16 x i8> %0, %a + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %b, + %1 = or <4 x i32> %0, %a + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %b, + %1 = or <8 x i16> %0, %a + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x float> @test_vornq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = xor <4 x i32> %1, + %3 = or <4 x i32> %2, %0 + %4 = bitcast <4 x i32> %3 to <4 x float> + ret <4 x float> %4 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vornq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vornq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vornt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vornq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vornq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vornt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vornq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vornq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vornt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll new file mode 100644 index 0000000000000..ccb511a85e571 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = or <16 x i8> %b, %a + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %b, %a + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vorrq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = or <4 x i32> %1, %0 + %3 = bitcast <4 x i32> %2 to <4 x float> + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vorrq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vorrq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vorrq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vorrq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vorrq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll new file mode 100644 index 0000000000000..3975e4eca8727 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vrmulhq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrmulh.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8>, <16 x i8>) #1 + +define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vrmulhq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrmulh.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %0 +} + +declare <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16>, <8 x i16>) #1 + +define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vrmulhq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrmulh.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32>, <4 x i32>) #1 + +define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vrmulhq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrmulht.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1 + +declare <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1 + +define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vrmulhq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrmulht.s16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1 + +declare <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1 + +define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vrmulhq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrmulht.s32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1 + +declare <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll new file mode 100644 index 0000000000000..ba3ef58c3c2eb --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll @@ -0,0 +1,2646 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #2 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #-254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #-254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #4] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #3] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #2] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #127] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r3] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r3] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #2 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + + + + +define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #2 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #256 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #-254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #-127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #-127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #-127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #2 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll new file mode 100644 index 0000000000000..0951589eaa14c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll @@ -0,0 +1,2646 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0], #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0], #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0], #-508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0], #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0], #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0], #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0], #-254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0], #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0], #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0], #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0], #-254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #-254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0], #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0], #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0], #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0], #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0], #-127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0], #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0], #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0], #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0], #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0], #-127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0], #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0], #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0], #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0], #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0], #-127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0], #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0], #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0], #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0], #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0], #-127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0], #4 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0], #3 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0], #2 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0], #127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0], #-127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0], #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0], #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0], #-508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0], #-254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + + + + +define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0], #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0], #-508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0], #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0], #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0], #-254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #-254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0], #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0], #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0], #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0], #-127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0], #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0], #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0], #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0], #-127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0], #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0], #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0], #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0], #-127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0], #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0], #-508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0], #-254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll new file mode 100644 index 0000000000000..beb5aae634116 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll @@ -0,0 +1,2646 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #4]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #2]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #254]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #-254]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #4]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #2]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #254]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #-254]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #4]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #3]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #2]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #127]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #4]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #3]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #2]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #127]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #4]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #3]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #2]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #127]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #4]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #3]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #2]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #127]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #4]! +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #3]! +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #2]! +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #127]! +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127]! +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + + + + +define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #2]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #-254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #3]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #2]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #-127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #3]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #2]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #-127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #3]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #2]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #-127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll index 46b64c8e4d8b1..100a082fd12be 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -13,8 +13,8 @@ define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) - call void @llvm.masked.store.v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1) + %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -29,9 +29,9 @@ define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *% entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) + %2 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = sext <4 x i8> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -46,9 +46,9 @@ define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -63,9 +63,9 @@ define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *% entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) + %2 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -80,9 +80,9 @@ define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -234,9 +234,9 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) %3 = sext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) ret void } @@ -392,9 +392,9 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) %3 = sext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) ret void } @@ -549,9 +549,9 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) %3 = zext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) ret void } @@ -710,9 +710,9 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) %3 = zext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) ret void } @@ -727,8 +727,8 @@ define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) - call void @llvm.masked.store.v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1) + %2 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1) ret void } @@ -743,9 +743,9 @@ define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *% entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) + %2 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = sext <8 x i8> %2 to <8 x i16> - call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) ret void } @@ -760,9 +760,9 @@ define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *% entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) + %2 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %2 to <8 x i16> - call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) ret void } @@ -777,8 +777,8 @@ define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src entry: %0 = load <16 x i8>, <16 x i8>* %mask, align 1 %1 = icmp sgt <16 x i8> %0, zeroinitializer - %2 = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef) - call void @llvm.masked.store.v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1) + %2 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef) + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1) ret void } @@ -793,9 +793,9 @@ define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> * entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) + %2 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) %3 = trunc <8 x i16> %2 to <8 x i8> - call void @llvm.masked.store.v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1) + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1) ret void } @@ -810,9 +810,9 @@ define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> * entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = trunc <4 x i32> %2 to <4 x i8> - call void @llvm.masked.store.v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1) + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1) ret void } @@ -827,9 +827,9 @@ define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = trunc <4 x i32> %2 to <4 x i16> - call void @llvm.masked.store.v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1) + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1) ret void } @@ -844,8 +844,8 @@ define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> * entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef) - call void @llvm.masked.store.v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1) + %2 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1) ret void } @@ -860,8 +860,8 @@ define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%s entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x half> @llvm.masked.load.v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef) - call void @llvm.masked.store.v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1) + %2 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef) + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1) ret void } @@ -991,9 +991,9 @@ define void @foo_v4f32_v4f16(<4 x float> *%dest, <4 x i16> *%mask, <4 x half> *% entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer - %2 = call <4 x half> @llvm.masked.load.v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) + %2 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) %3 = fpext <4 x half> %2 to <4 x float> - call void @llvm.masked.store.v4f32(<4 x float> %3, <4 x float>* %dest, i32 2, <4 x i1> %1) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %dest, i32 2, <4 x i1> %1) ret void } @@ -1123,29 +1123,29 @@ define void @foo_v4f32_v4f16_unaligned(<4 x float> *%dest, <4 x i16> *%mask, <4 entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer - %2 = call <4 x half> @llvm.masked.load.v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) + %2 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) %3 = fpext <4 x half> %2 to <4 x float> - call void @llvm.masked.store.v4f32(<4 x float> %3, <4 x float>* %dest, i32 1, <4 x i1> %1) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %dest, i32 1, <4 x i1> %1) ret void } -declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) -declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) -declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) -declare void @llvm.masked.store.v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) -declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) -declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) -declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) -declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) -declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) -declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) -declare <4 x half> @llvm.masked.load.v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>) -declare <8 x half> @llvm.masked.load.v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) -declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) -declare void @llvm.masked.store.v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) -declare void @llvm.masked.store.v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) -declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) -declare <4 x i16> @llvm.masked.load.v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) -declare <4 x i8> @llvm.masked.load.v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) -declare <8 x i8> @llvm.masked.load.v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll index e75e07604e879..54a94b8981c2e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -468,8 +468,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4i32_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -477,8 +476,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -495,8 +493,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4i32_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -504,8 +501,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1032,8 +1028,7 @@ define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-NEXT: vldr d1, [sp] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1043,8 +1038,7 @@ define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1061,8 +1055,7 @@ define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-LABEL: masked_v8i16_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1070,8 +1063,7 @@ define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1151,8 +1143,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-LE-LABEL: masked_v16i8_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1160,8 +1151,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr -; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrb.8 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1178,8 +1168,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-LE-LABEL: masked_v16i8_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1187,8 +1176,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr -; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0], #4 ; CHECK-BE-NEXT: vstrb.8 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1355,8 +1343,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4f32_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1364,8 +1351,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1382,8 +1368,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4f32_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1391,8 +1376,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1724,8 +1708,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-LABEL: masked_v8f16_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1733,8 +1716,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1751,8 +1733,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-LABEL: masked_v8f16_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1760,8 +1741,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll index 1fc9793fd50d4..425162721acf4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -111,8 +111,7 @@ define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4i32_pre: @@ -122,8 +121,7 @@ define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -142,8 +140,7 @@ define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4i32_post: @@ -153,8 +150,7 @@ define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -334,8 +330,7 @@ define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8i16_pre: @@ -345,8 +340,7 @@ define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -365,8 +359,7 @@ define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8i16_post: @@ -376,8 +369,7 @@ define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -416,8 +408,7 @@ define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v16i8_pre: @@ -427,8 +418,7 @@ define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.8 q2, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q2, zr -; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -447,8 +437,7 @@ define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vstrbt.8 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrbt.8 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v16i8_post: @@ -458,8 +447,7 @@ define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.8 q2, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q2, zr -; CHECK-BE-NEXT: vstrbt.8 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrbt.8 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -591,8 +579,7 @@ define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4f32_pre: @@ -602,8 +589,7 @@ define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -622,8 +608,7 @@ define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4f32_post: @@ -633,8 +618,7 @@ define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -904,8 +888,7 @@ define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8f16_pre: @@ -915,8 +898,7 @@ define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -935,8 +917,7 @@ define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8f16_post: @@ -946,8 +927,7 @@ define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index 22483aac109e8..79700e046f0ef 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -2609,3 +2609,2614 @@ entry: %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b ret <8 x half> %s } + + +; Reversed + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oeq_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 eq, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp oeq <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_one_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r3, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_one_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp one <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ogt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ogt <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_oge_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oge_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp oge <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_olt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_olt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp olt <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ole_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ole_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ole <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r3, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ueq_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ueq <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_une_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_une_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 ne, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp une <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ugt <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_uge_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp uge <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ult_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ult <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ule_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ule <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ord_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ord_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp ord <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_uno_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s2 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uno_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmov r0, s4 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %src2, i32 0 + %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %c = fcmp uno <4 x float> %sp, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + + + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_oeq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_oeq_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oeq_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp oeq <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_one_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_one_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp one <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ogt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ogt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ogt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ogt <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_oge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_oge_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oge_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp oge <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_olt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_olt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_olt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp olt <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ole_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ole_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ole_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ole <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ueq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ueq_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ueq_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ueq <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_une_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_une_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp une <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ugt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ugt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ugt <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_uge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_uge_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp uge <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ult_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ult_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ult <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ule_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ule_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ule <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ord_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ord_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp ord <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_uno_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_uno_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9, d10} +; CHECK-MVE-NEXT: vpush {d8, d9, d10} +; CHECK-MVE-NEXT: vldr.16 s16, [r0] +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vcmp.f16 s16, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: movs r2, #0 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s20, s9 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[1], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[2], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s5 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s2 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s20, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vmovx.f16 s18, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s18 +; CHECK-MVE-NEXT: vmov.16 q3[4], r0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s18, s6 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vcmp.f16 s16, s3 +; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: vcmp.f16 s16, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: vmov r0, s18 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov.16 q3[6], r0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9, d10} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uno_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] +; CHECK-MVEFP-NEXT: vmov r0, s12 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %src2 = load half, half* %src2p + %i = insertelement <8 x half> undef, half %src2, i32 0 + %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %c = fcmp uno <8 x half> %sp, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 6aae7e7665a10..82ef5df349aaf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -107,9 +107,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_one_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -380,9 +379,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -698,9 +696,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ord_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -753,9 +750,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_uno_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -1013,9 +1009,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1632,9 +1627,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ueq_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2358,9 +2352,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2481,9 +2474,8 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2491,3 +2483,2488 @@ entry: %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b ret <8 x half> %s } + + +; Reversed + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oeq_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 eq, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp oeq <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_one_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r3, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_one_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp one <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ogt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ogt <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_oge_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oge_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp oge <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_olt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_olt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp olt <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ole_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ole_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ole <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r3, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ueq_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ueq <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_une_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_une_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 ne, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp une <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ugt <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_uge_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp uge <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ult_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ult <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ule_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ule <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_ord_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ord_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ord <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + +define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { +; CHECK-MVE-LABEL: vcmp_r_uno_v4f32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #1 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: cset r3, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: lsls r0, r3, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 +; CHECK-MVE-NEXT: lsls r0, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 +; CHECK-MVE-NEXT: lsls r0, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uno_v4f32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp uno <4 x float> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %s +} + + + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_oeq_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_oeq_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oeq_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp oeq <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_one_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_one_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp one <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ogt_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ogt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it mi +; CHECK-MVE-NEXT: movmi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ogt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ogt <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_oge_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_oge_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ls +; CHECK-MVE-NEXT: movls r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_oge_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp oge <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_olt_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_olt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it gt +; CHECK-MVE-NEXT: movgt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_olt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp olt <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ole_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ole_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ge +; CHECK-MVE-NEXT: movge r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ole_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ole <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ueq_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ueq_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r2, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: mov.w r0, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r1, #1 +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it eq +; CHECK-MVE-NEXT: moveq r0, #1 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ueq_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ueq <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_une_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it ne +; CHECK-MVE-NEXT: movne r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_une_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp une <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ugt_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ugt_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it lt +; CHECK-MVE-NEXT: movlt r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ugt <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_uge_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_uge_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it le +; CHECK-MVE-NEXT: movle r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp uge <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ult_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ult_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it hi +; CHECK-MVE-NEXT: movhi r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ult <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ule_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ule_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it pl +; CHECK-MVE-NEXT: movpl r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ule <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_ord_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vc +; CHECK-MVE-NEXT: movvc r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_ord_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp ord <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} + +define arm_aapcs_vfpcc <8 x half> @vcmp_r_uno_v8f16(<8 x half> %src, <8 x half> %a, <8 x half> %b) { +; CHECK-MVE-LABEL: vcmp_r_uno_v8f16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: vmovx.f16 s12, s0 +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, s12 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: mov.w r2, #0 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #1 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: cset r2, ne +; CHECK-MVE-NEXT: vmov r1, s12 +; CHECK-MVE-NEXT: lsls r2, r2, #31 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 +; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s9 +; CHECK-MVE-NEXT: vmov.16 q3[0], r2 +; CHECK-MVE-NEXT: vmovx.f16 s0, s3 +; CHECK-MVE-NEXT: vmov.16 q3[1], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: movs r0, #0 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s1 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmov.16 q3[2], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s5 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s18, s10 +; CHECK-MVE-NEXT: vmov.16 q3[3], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vmovx.f16 s16, s2 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 +; CHECK-MVE-NEXT: vmov.16 q3[4], r1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s16, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[5], r1 +; CHECK-MVE-NEXT: mov.w r1, #0 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #1 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: cset r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vmovx.f16 s2, s11 +; CHECK-MVE-NEXT: vseleq.f16 s16, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #1 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cset r0, ne +; CHECK-MVE-NEXT: vmov r1, s16 +; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.16 q3[6], r1 +; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q3[7], r0 +; CHECK-MVE-NEXT: vmov q0, q3 +; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: vcmp_r_uno_v8f16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: bx lr +entry: + %c = fcmp uno <8 x half> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %s +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll index a6fc2dbe4a4c8..c832c241e67cc 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll @@ -591,3 +591,596 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < %a11 = select <2 x i1> %a10, <2 x i32> %c, <2 x i32> %a5 ret <2 x i32> %a11 } + +; Reversed + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_eq_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_eq_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 eq, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp eq <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ne_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ne_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ne <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_sgt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_sgt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 lt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp sgt <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_sge_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_sge_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 le, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp sge <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_slt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_slt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 gt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp slt <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_sle_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_sle_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 ge, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp sle <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ugt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vcmp.u32 hi, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ugt <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_uge_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_uge_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q3, r0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp uge <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ult_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ult_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 hi, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ult <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ule_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ule_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 cs, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %c = icmp ule <4 x i32> %sp, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_eq_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_eq_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 eq, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp eq <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ne_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ne_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ne <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_sgt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_sgt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 lt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp sgt <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_sge_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_sge_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 le, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp sge <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_slt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_slt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 gt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp slt <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_sle_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_sle_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 ge, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp sle <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ugt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q3, r0 +; CHECK-NEXT: vcmp.u16 hi, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ugt <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_uge_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_uge_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q3, r0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp uge <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ult_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ult_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 hi, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ult <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ule_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ule_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 cs, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %c = icmp ule <8 x i16> %sp, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_eq_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_eq_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 eq, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp eq <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ne_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ne_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ne <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_sgt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_sgt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 lt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp sgt <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_sge_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_sge_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 le, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp sge <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_slt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_slt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 gt, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp slt <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_sle_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_sle_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 ge, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp sle <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ugt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q3, r0 +; CHECK-NEXT: vcmp.u8 hi, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ugt <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_uge_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_uge_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q3, r0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp uge <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ult_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ult_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 hi, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ult <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ule_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ule_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 cs, q0, r0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %c = icmp ule <16 x i8> %sp, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + + +define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vcmp_r_eq_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: eors r2, r1 +; CHECK-NEXT: eors r3, r0 +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: cset r2, eq +; CHECK-NEXT: tst.w r2, #1 +; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: eors r0, r2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %sp = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %c = icmp eq <2 x i64> %sp, %src + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vcmp_r_eq_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: eors r2, r1 +; CHECK-NEXT: eors r3, r0 +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: cset r2, eq +; CHECK-NEXT: tst.w r2, #1 +; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: vmov.32 q3[0], r2 +; CHECK-NEXT: vmov.32 q3[1], r2 +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: eors r0, r2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %sp = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %c = icmp eq <2 x i64> %sp, %src + %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { +; CHECK-LABEL: vcmp_r_multi_v2i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vmov lr, s0 +; CHECK-NEXT: subs.w r1, lr, r2 +; CHECK-NEXT: asr.w r12, lr, #31 +; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov.32 q3[0], r1 +; CHECK-NEXT: vmov.32 q3[1], r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: subs r0, r1, r2 +; CHECK-NEXT: asr.w r12, r1, #31 +; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: cmp.w lr, #0 +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q5[0], r0 +; CHECK-NEXT: vmov.32 q5[1], r0 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q5[2], r0 +; CHECK-NEXT: vmov.32 q5[3], r0 +; CHECK-NEXT: vand q1, q5, q4 +; CHECK-NEXT: vand q1, q3, q1 +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: vand q1, q2, q1 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} + %a4 = icmp eq <2 x i64> %a, zeroinitializer + %a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c + %a6 = icmp ne <2 x i32> %b, zeroinitializer + %a7 = icmp slt <2 x i32> %a5, %c + %a8 = icmp ne <2 x i32> %a5, zeroinitializer + %a9 = and <2 x i1> %a6, %a8 + %a10 = and <2 x i1> %a7, %a9 + %a11 = select <2 x i1> %a10, <2 x i32> %c, <2 x i32> %a5 + ret <2 x i32> %a11 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll index 142511b10d6af..6d08abc723021 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll @@ -415,3 +415,421 @@ entry: %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b ret <2 x i32> %s } + + +; Reversed + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_eqz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_eqz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_nez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_nez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_sgtz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_sgtz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 lt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_sgez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_sgez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 le, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_sltz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_sltz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_slez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_slez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 ge, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugtz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ugtz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ugez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ultz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ultz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_r_ulez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <4 x i32> zeroinitializer, %src + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_eqz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_eqz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_nez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_nez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_sgtz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_sgtz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 lt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_sgez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_sgez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 le, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_sltz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_sltz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_slez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_slez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 ge, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugtz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ugtz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ugez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ultz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ultz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_r_ulez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <8 x i16> zeroinitializer, %src + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_eqz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_eqz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_nez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_nez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_sgtz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_sgtz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 lt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_sgez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_sgez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 le, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_sltz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_sltz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 gt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_slez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_slez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 ge, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugtz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ugtz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ugez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ultz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ultz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_r_ulez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <16 x i8> zeroinitializer, %src + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + + +define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vcmp_r_eqz_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> zeroinitializer, %src + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vcmp_r_eqz_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: tst.w r0, #1 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %s +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vctp.ll b/llvm/test/CodeGen/Thumb2/mve-vctp.ll index 8f7e1696e6790..d6e4d492f5351 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vctp.ll @@ -10,7 +10,7 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r2] ; CHECK-NEXT: bx lr - %pred = call <16 x i1> @llvm.arm.vctp8(i32 %arg) + %pred = call <16 x i1> @llvm.arm.mve.vctp8(i32 %arg) %ld = load <16 x i8>, <16 x i8>* %in %res = select <16 x i1> %pred, <16 x i8> %ld, <16 x i8> zeroinitializer store <16 x i8> %res, <16 x i8>* %out @@ -26,7 +26,7 @@ define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r2] ; CHECK-NEXT: bx lr - %pred = call <8 x i1> @llvm.arm.vctp16(i32 %arg) + %pred = call <8 x i1> @llvm.arm.mve.vctp16(i32 %arg) %ld = load <8 x i16>, <8 x i16>* %in %res = select <8 x i1> %pred, <8 x i16> %ld, <8 x i16> zeroinitializer store <8 x i16> %res, <8 x i16>* %out @@ -42,13 +42,13 @@ define void @vctp32(i32 %arg, <4 x i32> *%in, <4 x i32>* %out) { ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r2] ; CHECK-NEXT: bx lr - %pred = call <4 x i1> @llvm.arm.vctp32(i32 %arg) + %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %arg) %ld = load <4 x i32>, <4 x i32>* %in %res = select <4 x i1> %pred, <4 x i32> %ld, <4 x i32> zeroinitializer store <4 x i32> %res, <4 x i32>* %out ret void } -declare <16 x i1> @llvm.arm.vctp8(i32) -declare <8 x i1> @llvm.arm.vctp16(i32) -declare <4 x i1> @llvm.arm.vctp32(i32) +declare <16 x i1> @llvm.arm.mve.vctp8(i32) +declare <8 x i1> @llvm.arm.mve.vctp16(i32) +declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll index 19979f203f16b..83534e2c3e833 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll @@ -4,7 +4,8 @@ define arm_aapcs_vfpcc <16 x i8> @add_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_ashr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -15,7 +16,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_ashr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -26,7 +28,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_ashr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -37,7 +40,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @add_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_lshr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -48,7 +52,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_lshr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -59,7 +64,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_lshr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -70,7 +76,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_ashr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -81,7 +88,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_ashr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -92,7 +100,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_ashr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -103,7 +112,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_lshr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -114,7 +124,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_lshr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -125,7 +136,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_lshr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -140,7 +152,8 @@ define arm_aapcs_vfpcc <16 x i8> @add_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i8 q0, q0, q1 ; CHECK-NEXT: vshr.u8 q1, q0, #7 -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -153,7 +166,8 @@ define arm_aapcs_vfpcc <8 x i16> @add_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i16 q0, q0, q1 ; CHECK-NEXT: vshr.u16 q1, q0, #15 -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -166,7 +180,8 @@ define arm_aapcs_vfpcc <4 x i32> @add_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: vshr.u32 q1, q0, #31 -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -177,7 +192,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @add_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_udiv_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -188,7 +204,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_udiv_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -199,7 +216,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_udiv_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -212,7 +230,8 @@ define arm_aapcs_vfpcc <16 x i8> @sub_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i8 q0, q0, q1 ; CHECK-NEXT: vshr.u8 q1, q0, #7 -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -225,7 +244,8 @@ define arm_aapcs_vfpcc <8 x i16> @sub_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i16 q0, q0, q1 ; CHECK-NEXT: vshr.u16 q1, q0, #15 -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -238,7 +258,8 @@ define arm_aapcs_vfpcc <4 x i32> @sub_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i32 q0, q0, q1 ; CHECK-NEXT: vshr.u32 q1, q0, #31 -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -249,7 +270,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_udiv_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -260,7 +282,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_udiv_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -271,7 +294,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_udiv_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-from-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-vpt-from-intrinsics.ll index c7533503fa777..e6e7de6109431 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-from-intrinsics.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-from-intrinsics.ll @@ -1,5 +1,7 @@ ; RUN: opt -instcombine %s | llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + define arm_aapcs_vfpcc <8 x i16> @test_vpt_block(<8 x i16> %v_inactive, <8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { ; CHECK-LABEL: test_vpt_block: ; CHECK: @ %bb.0: @ %entry @@ -16,7 +18,27 @@ entry: ret <8 x i16> %5 } +define arm_aapcs_vfpcc <8 x i16> @test_vpnot(<8 x i16> %v, <8 x i16> %w, <8 x i16> %x, i32 %n) { +; CHECK-LABEL: test_vpnot: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vctp.16 r0 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) + %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0) + %2 = trunc i32 %1 to i16 + %3 = xor i16 %2, -1 + %4 = zext i16 %3 to i32 + %5 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %4) + %6 = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %w, <8 x i16> %x, <8 x i1> %5, <8 x i16> %v) + ret <8 x i16> %6 +} + declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>) declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) declare <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) +declare <8 x i1> @llvm.arm.mve.vctp16(i32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll index c24845edbddb1..849cf0d4ce4cf 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll @@ -165,7 +165,7 @@ define i1 @test_ashr_i1_imm1(i32 %arg1) { ; X64-LABEL: test_ashr_i1_imm1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movb $-1, %cl +; X64-NEXT: movb $1, %cl ; X64-NEXT: shlb $7, %al ; X64-NEXT: sarb $7, %al ; X64-NEXT: andb $1, %cl diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-constant.mir index 3b4bec6978f74..b89116e3a6cf7 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-constant.mir @@ -18,7 +18,7 @@ registers: body: | bb.1 (%ir-block.0): ; X32-LABEL: name: test_constant - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X32: $eax = COPY [[C]](s32) ; X32: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 ; X32: $al = COPY [[C1]](s8) @@ -32,7 +32,7 @@ body: | ; X32: $rax = COPY [[MV]](s64) ; X32: RET 0 ; X64-LABEL: name: test_constant - ; X64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; X64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X64: $eax = COPY [[C]](s32) ; X64: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 ; X64: $al = COPY [[C1]](s8) diff --git a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll index e935c1ca04bbb..5dd53751247f7 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll @@ -164,7 +164,7 @@ define i1 @test_lshr_i1_imm1(i32 %arg1) { ; X64-LABEL: test_lshr_i1_imm1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movb $-1, %cl +; X64-NEXT: movb $1, %cl ; X64-NEXT: andb $1, %al ; X64-NEXT: andb $1, %cl ; X64-NEXT: shrb %cl, %al diff --git a/llvm/test/CodeGen/X86/GlobalISel/shl-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/shl-scalar.ll index 49aa99e01c6ce..5ccc0eee59512 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/shl-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/shl-scalar.ll @@ -162,7 +162,7 @@ define i1 @test_shl_i1_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i1_imm1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movb $-1, %cl +; X64-NEXT: movb $1, %cl ; X64-NEXT: andb $1, %cl ; X64-NEXT: shlb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 6f7247388640a..e6b43c07fe056 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -25,25 +25,25 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -69,12 +69,12 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; NODQ-NEXT: retq ; @@ -100,7 +100,7 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; NODQ-NEXT: retq ; ; VLDQ-LABEL: slto2f64: @@ -140,7 +140,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { ; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VLNODQ-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; VLNODQ-NEXT: retq ; @@ -1040,13 +1040,13 @@ define <16 x float> @slto16f32(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 +; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] @@ -1094,25 +1094,25 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -1138,25 +1138,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -1164,25 +1164,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vpextrq $1, %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; NODQ-NEXT: retq @@ -1275,13 +1275,13 @@ define <16 x float> @ulto16f32(<16 x i64> %a) { ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 +; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index acc4b7e138118..258cc2031ae8b 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { ; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin ; CHECK: %loop.inner2.begin -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin ; CHECK: %bail entry: diff --git a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll index 2e39fb976c752..aca23b032708d 100644 --- a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll +++ b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-apple-macosx10.10.0 < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -12,9 +13,8 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define double @mag_pos0_double(double %x) nounwind { ; CHECK-LABEL: mag_pos0_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: andps [[SIGNMASK1]](%rip), %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq -; %y = call double @copysign(double 0.0, double %x) ret double %y } @@ -25,10 +25,9 @@ define double @mag_pos0_double(double %x) nounwind { define double @mag_neg0_double(double %x) nounwind { ; CHECK-LABEL: mag_neg0_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: movsd [[SIGNMASK2]](%rip), %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq -; %y = call double @copysign(double -0.0, double %x) ret double %y } @@ -42,11 +41,10 @@ define double @mag_neg0_double(double %x) nounwind { define double @mag_pos1_double(double %x) nounwind { ; CHECK-LABEL: mag_pos1_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: andps [[SIGNMASK3]](%rip), %xmm0 -; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq -; %y = call double @copysign(double 1.0, double %x) ret double %y } @@ -61,10 +59,9 @@ define double @mag_pos1_double(double %x) nounwind { define double @mag_neg1_double(double %x) nounwind { ; CHECK-LABEL: mag_neg1_double: ; CHECK: ## %bb.0: -; CHECK-NEXT: andps [[SIGNMASK4]](%rip), %xmm0 -; CHECK-NEXT: orps [[ONE4]](%rip), %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq -; %y = call double @copysign(double -1.0, double %x) ret double %y } @@ -78,9 +75,8 @@ define double @mag_neg1_double(double %x) nounwind { define float @mag_pos0_float(float %x) nounwind { ; CHECK-LABEL: mag_pos0_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: andps [[SIGNMASK5]](%rip), %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq -; %y = call float @copysignf(float 0.0, float %x) ret float %y } @@ -91,10 +87,9 @@ define float @mag_pos0_float(float %x) nounwind { define float @mag_neg0_float(float %x) nounwind { ; CHECK-LABEL: mag_neg0_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: movss [[SIGNMASK6]](%rip), %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq -; %y = call float @copysignf(float -0.0, float %x) ret float %y } @@ -110,11 +105,10 @@ define float @mag_neg0_float(float %x) nounwind { define float @mag_pos1_float(float %x) nounwind { ; CHECK-LABEL: mag_pos1_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: andps [[SIGNMASK7]](%rip), %xmm0 -; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq -; %y = call float @copysignf(float 1.0, float %x) ret float %y } @@ -133,10 +127,9 @@ define float @mag_pos1_float(float %x) nounwind { define float @mag_neg1_float(float %x) nounwind { ; CHECK-LABEL: mag_neg1_float: ; CHECK: ## %bb.0: -; CHECK-NEXT: andps [[SIGNMASK8]](%rip), %xmm0 -; CHECK-NEXT: orps [[ONE8]](%rip), %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq -; %y = call float @copysignf(float -1.0, float %x) ret float %y } diff --git a/llvm/test/CodeGen/X86/domain-reassignment.mir b/llvm/test/CodeGen/X86/domain-reassignment.mir index 38755344849a0..e24a5ded09a00 100644 --- a/llvm/test/CodeGen/X86/domain-reassignment.mir +++ b/llvm/test/CodeGen/X86/domain-reassignment.mir @@ -167,7 +167,7 @@ body: | bb.1.if: successors: %bb.3(0x80000000) - %14 = VCMPSSZrr %7, %8, 0 + %14 = VCMPSSZrr %7, %8, 0, implicit $mxcsr ; check that cross domain copies are replaced with same domain copies. @@ -177,7 +177,7 @@ body: | bb.2.else: successors: %bb.3(0x80000000) - %12 = VCMPSSZrr %9, %10, 0 + %12 = VCMPSSZrr %9, %10, 0, implicit $mxcsr ; check that cross domain copies are replaced with same domain copies. @@ -292,7 +292,7 @@ body: | %3 = COPY $zmm2 %4 = COPY $zmm3 - %5 = VCMPPDZrri %3, %4, 0 + %5 = VCMPPDZrri %3, %4, 0, implicit $mxcsr %6 = COPY %5 %7 = COPY %6.sub_8bit @@ -411,7 +411,7 @@ body: | %3 = COPY $zmm2 %4 = COPY $zmm3 - %5 = VCMPPSZrri %3, %4, 0 + %5 = VCMPPSZrri %3, %4, 0, implicit $mxcsr %6 = COPY %5 %7 = COPY %6.sub_16bit diff --git a/llvm/test/CodeGen/X86/dwarf-headers.ll b/llvm/test/CodeGen/X86/dwarf-headers.ll index ef626ad7003bb..6159fc29f8623 100644 --- a/llvm/test/CodeGen/X86/dwarf-headers.ll +++ b/llvm/test/CodeGen/X86/dwarf-headers.ll @@ -75,7 +75,7 @@ ; O-5: .debug_info contents: ; O-5: 0x00000000: Compile Unit: {{.*}} version = 0x0005 unit_type = DW_UT_skeleton abbr_offset ; O-5-SAME: DWO_id = 0xccd7e58ef8bf4aa6 -; O-5: 0x00000014: DW_TAG_compile_unit +; O-5: 0x00000014: DW_TAG_skeleton_unit ; ; DWO-5: .debug_info.dwo contents: ; DWO-5: 0x00000000: Type Unit: {{.*}} version = 0x0005 unit_type = DW_UT_split_type abbr_offset diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index eaf68b9bb210b..3b0fbcba43488 100755 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -167,14 +167,14 @@ body: | $ymm0 = VPADDWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPADDWYrr $ymm0, $ymm1 $ymm0 = VPADDWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMULPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMULPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMULPDYrr $ymm0, $ymm1 - $ymm0 = VMULPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMULPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMULPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMULPSYrr $ymm0, $ymm1 - $ymm0 = VMULPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VMULPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMULPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMULPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMULPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMULPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMULPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMULPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMULPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VORPDYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VORPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VORPDYrr $ymm0, $ymm1 @@ -315,14 +315,14 @@ body: | $ymm0 = VPXORQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPXORYrr $ymm0, $ymm1 $ymm0 = VPXORQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VADDPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VADDPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VADDPDYrr $ymm0, $ymm1 - $ymm0 = VADDPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VADDPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VADDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VADDPSYrr $ymm0, $ymm1 - $ymm0 = VADDPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VADDPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VADDPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VADDPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VADDPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VADDPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VADDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VADDPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VADDPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VANDNPDYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VANDNPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VANDNPDYrr $ymm0, $ymm1 @@ -339,46 +339,46 @@ body: | $ymm0 = VANDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VANDPSYrr $ymm0, $ymm1 $ymm0 = VANDPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VDIVPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VDIVPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VDIVPDYrr $ymm0, $ymm1 - $ymm0 = VDIVPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VDIVPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VDIVPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VDIVPSYrr $ymm0, $ymm1 - $ymm0 = VDIVPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMAXCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMAXCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMAXCPDYrr $ymm0, $ymm1 - $ymm0 = VMAXCPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMAXCPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMAXCPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMAXCPSYrr $ymm0, $ymm1 - $ymm0 = VMAXCPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMAXPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMAXPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMAXPDYrr $ymm0, $ymm1 - $ymm0 = VMAXPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMAXPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMAXPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMAXPSYrr $ymm0, $ymm1 - $ymm0 = VMAXPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMINCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMINCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMINCPDYrr $ymm0, $ymm1 - $ymm0 = VMINCPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMINCPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMINCPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMINCPSYrr $ymm0, $ymm1 - $ymm0 = VMINCPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMINPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMINPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMINPDYrr $ymm0, $ymm1 - $ymm0 = VMINPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMINPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMINPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMINPSYrr $ymm0, $ymm1 - $ymm0 = VMINPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VDIVPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VDIVPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VDIVPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VDIVPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VDIVPSZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMAXCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXCPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMAXCPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMAXCPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXCPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXCPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMAXCPSZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMAXPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMAXPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMAXPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMAXPSZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMINCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMINCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINCPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMINCPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMINCPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMINCPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINCPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMINCPSZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMINPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMINPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMINPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMINPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMINPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMINPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VXORPDYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VXORPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VXORPDYrr $ymm0, $ymm1 @@ -419,14 +419,14 @@ body: | $ymm0 = VUNPCKLPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VUNPCKLPSYrr $ymm0, $ymm1 $ymm0 = VUNPCKLPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VSUBPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VSUBPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VSUBPDYrr $ymm0, $ymm1 - $ymm0 = VSUBPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VSUBPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VSUBPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VSUBPSYrr $ymm0, $ymm1 - $ymm0 = VSUBPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VSUBPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VSUBPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VSUBPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VSUBPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VSUBPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VPUNPCKHBWYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VPUNPCKHBWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPUNPCKHBWYrr $ymm0, $ymm1 @@ -459,150 +459,150 @@ body: | $ymm0 = VPUNPCKLWDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPUNPCKLWDYrr $ymm0, $ymm1 $ymm0 = VPUNPCKLWDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VFMADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADD132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADD132PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADD132PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADD132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADD132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADD132PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADD132PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADD213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADD213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADD213PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADD213PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADD213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADD213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADD213PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADD213PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADD231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADD231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADD231PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADD231PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADD231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADD231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADD231PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADD231PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADDSUB132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADDSUB132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADDSUB132PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADDSUB132PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADDSUB132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADDSUB132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADDSUB132PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADDSUB132PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADDSUB213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADDSUB213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADDSUB213PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADDSUB213PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADDSUB213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADDSUB213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADDSUB213PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADDSUB213PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADDSUB231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADDSUB231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADDSUB231PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADDSUB231PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMADDSUB231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMADDSUB231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMADDSUB231PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMADDSUB231PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUB132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUB132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUB132PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUB132PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUB132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUB132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUB132PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUB132PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUB213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUB213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUB213PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUB213PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUB213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUB213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUB213PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUB213PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUB231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUB231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUB231PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUB231PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUB231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUB231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUB231PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUB231PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUBADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUBADD132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUBADD132PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUBADD132PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUBADD132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUBADD132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUBADD132PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUBADD132PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUBADD213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUBADD213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUBADD213PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUBADD213PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUBADD213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUBADD213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUBADD213PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUBADD213PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUBADD231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUBADD231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUBADD231PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUBADD231PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFMSUBADD231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFMSUBADD231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFMSUBADD231PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFMSUBADD231PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMADD132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMADD132PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMADD132PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMADD132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMADD132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMADD132PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMADD132PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMADD213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMADD213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMADD213PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMADD213PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMADD213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMADD213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMADD213PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMADD213PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMADD231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMADD231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMADD231PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMADD231PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMADD231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMADD231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMADD231PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMADD231PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMSUB132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMSUB132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMSUB132PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMSUB132PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMSUB132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMSUB132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMSUB132PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMSUB132PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMSUB213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMSUB213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMSUB213PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMSUB213PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMSUB213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMSUB213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMSUB213PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMSUB213PSZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMSUB231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMSUB231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMSUB231PDYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMSUB231PDZ256r $ymm0, $ymm1, $ymm2 - ; CHECK: $ymm0 = VFNMSUB231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - $ymm0 = VFNMSUB231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VFNMSUB231PSYr $ymm0, $ymm1, $ymm2 - $ymm0 = VFNMSUB231PSZ256r $ymm0, $ymm1, $ymm2 + ; CHECK: $ymm0 = VFMADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADD132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD132PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADD132PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADD132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD132PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADD132PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADD213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD213PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADD213PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADD213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD213PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADD213PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADD231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD231PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADD231PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADD231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADD231PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADD231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADDSUB132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB132PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADDSUB132PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADDSUB132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB132PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADDSUB132PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADDSUB213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB213PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADDSUB213PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADDSUB213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB213PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADDSUB213PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADDSUB231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB231PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADDSUB231PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMADDSUB231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMADDSUB231PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMADDSUB231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUB132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB132PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUB132PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUB132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB132PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUB132PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUB213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB213PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUB213PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUB213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB213PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUB213PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUB231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB231PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUB231PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUB231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUB231PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUB231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUBADD132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD132PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUBADD132PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUBADD132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD132PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUBADD132PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUBADD213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD213PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUBADD213PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUBADD213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD213PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUBADD213PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUBADD231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD231PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUBADD231PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFMSUBADD231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFMSUBADD231PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFMSUBADD231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMADD132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD132PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMADD132PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMADD132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD132PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMADD132PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMADD213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD213PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMADD213PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMADD213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD213PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMADD213PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMADD231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD231PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMADD231PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMADD231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMADD231PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMADD231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMSUB132PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB132PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMSUB132PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB132PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMSUB132PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB132PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMSUB132PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB213PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMSUB213PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB213PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMSUB213PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB213PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMSUB213PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB213PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMSUB213PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB231PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMSUB231PDZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB231PDYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMSUB231PDZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB231PSYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VFNMSUB231PSZ256m $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VFNMSUB231PSYr $ymm0, $ymm1, $ymm2, implicit $mxcsr + $ymm0 = VFNMSUB231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr ; CHECK: $ymm0 = VPSRADYri $ymm0, 7 $ymm0 = VPSRADZ256ri $ymm0, 7 ; CHECK: $ymm0 = VPSRADYrm $ymm0, $rip, 1, $rax, 0, $noreg @@ -811,50 +811,50 @@ body: | $ymm0 = VCVTDQ2PDZ256rm $rdi, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VCVTDQ2PDYrr $xmm0 $ymm0 = VCVTDQ2PDZ256rr $xmm0 - ; CHECK: $ymm0 = VCVTDQ2PSYrm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VCVTDQ2PSZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VCVTDQ2PSYrr $ymm0 - $ymm0 = VCVTDQ2PSZ256rr $ymm0 - ; CHECK: $xmm0 = VCVTPD2DQYrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPD2DQYrr $ymm0 - $xmm0 = VCVTPD2DQZ256rr $ymm0 - ; CHECK: $xmm0 = VCVTPD2PSYrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPD2PSZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPD2PSYrr $ymm0 - $xmm0 = VCVTPD2PSZ256rr $ymm0 - ; CHECK: $ymm0 = VCVTPS2DQYrm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VCVTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VCVTPS2DQYrr $ymm0 - $ymm0 = VCVTPS2DQZ256rr $ymm0 - ; CHECK: $ymm0 = VCVTPS2PDYrm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VCVTPS2PDZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VCVTPS2PDYrr $xmm0 - $ymm0 = VCVTPS2PDZ256rr $xmm0 - ; CHECK: VCVTPS2PHYmr $rdi, 1, $noreg, 0, $noreg, $ymm0, 0 - VCVTPS2PHZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0, 0 - ; CHECK: $xmm0 = VCVTPS2PHYrr $ymm0, 0 - $xmm0 = VCVTPS2PHZ256rr $ymm0, 0 - ; CHECK: $ymm0 = VCVTPH2PSYrm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VCVTPH2PSZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VCVTPH2PSYrr $xmm0 - $ymm0 = VCVTPH2PSZ256rr $xmm0 - ; CHECK: $xmm0 = VCVTTPD2DQYrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTTPD2DQYrr $ymm0 - $xmm0 = VCVTTPD2DQZ256rr $ymm0 - ; CHECK: $ymm0 = VCVTTPS2DQYrm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VCVTTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VCVTTPS2DQYrr $ymm0 - $ymm0 = VCVTTPS2DQZ256rr $ymm0 - ; CHECK: $ymm0 = VSQRTPDYm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VSQRTPDZ256m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VSQRTPDYr $ymm0 - $ymm0 = VSQRTPDZ256r $ymm0 - ; CHECK: $ymm0 = VSQRTPSYm $rdi, 1, $noreg, 0, $noreg - $ymm0 = VSQRTPSZ256m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm0 = VSQRTPSYr $ymm0 - $ymm0 = VSQRTPSZ256r $ymm0 + ; CHECK: $ymm0 = VCVTDQ2PSYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VCVTDQ2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VCVTDQ2PSYrr $ymm0, implicit $mxcsr + $ymm0 = VCVTDQ2PSZ256rr $ymm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2DQYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2DQYrr $ymm0, implicit $mxcsr + $xmm0 = VCVTPD2DQZ256rr $ymm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2PSYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPD2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2PSYrr $ymm0, implicit $mxcsr + $xmm0 = VCVTPD2PSZ256rr $ymm0, implicit $mxcsr + ; CHECK: $ymm0 = VCVTPS2DQYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VCVTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VCVTPS2DQYrr $ymm0, implicit $mxcsr + $ymm0 = VCVTPS2DQZ256rr $ymm0, implicit $mxcsr + ; CHECK: $ymm0 = VCVTPS2PDYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VCVTPS2PDZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VCVTPS2PDYrr $xmm0, implicit $mxcsr + $ymm0 = VCVTPS2PDZ256rr $xmm0, implicit $mxcsr + ; CHECK: VCVTPS2PHYmr $rdi, 1, $noreg, 0, $noreg, $ymm0, 0, implicit $mxcsr + VCVTPS2PHZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0, 0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPS2PHYrr $ymm0, 0, implicit $mxcsr + $xmm0 = VCVTPS2PHZ256rr $ymm0, 0, implicit $mxcsr + ; CHECK: $ymm0 = VCVTPH2PSYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VCVTPH2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VCVTPH2PSYrr $xmm0, implicit $mxcsr + $ymm0 = VCVTPH2PSZ256rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTTPD2DQYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTTPD2DQYrr $ymm0, implicit $mxcsr + $xmm0 = VCVTTPD2DQZ256rr $ymm0, implicit $mxcsr + ; CHECK: $ymm0 = VCVTTPS2DQYrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VCVTTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VCVTTPS2DQYrr $ymm0, implicit $mxcsr + $ymm0 = VCVTTPS2DQZ256rr $ymm0, implicit $mxcsr + ; CHECK: $ymm0 = VSQRTPDYm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VSQRTPDZ256m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSQRTPDYr $ymm0, implicit $mxcsr + $ymm0 = VSQRTPDZ256r $ymm0, implicit $mxcsr + ; CHECK: $ymm0 = VSQRTPSYm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VSQRTPSZ256m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSQRTPSYr $ymm0, implicit $mxcsr + $ymm0 = VSQRTPSZ256r $ymm0, implicit $mxcsr ; CHECK: $ymm0 = VPALIGNRYrmi $ymm0, $rdi, 1, $noreg, 0, $noreg, 1 $ymm0 = VPALIGNRZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $ymm0 = VPALIGNRYrri $ymm0, $ymm1, 1 @@ -889,14 +889,14 @@ body: | $ymm0 = VSHUFPSZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm0 = VSHUFPSYrri $ymm0, $ymm1, -24 $ymm0 = VSHUFPSZ256rri $ymm0, $ymm1, -24 - ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $rax, 0, $noreg, 15 - $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15 - $ymm0 = VRNDSCALEPDZ256rri $ymm0, 15 - ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $rax, 0, $noreg, 15 - $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15 - $ymm0 = VRNDSCALEPSZ256rri $ymm0, 15 + ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15, implicit $mxcsr + $ymm0 = VRNDSCALEPDZ256rri $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15, implicit $mxcsr + $ymm0 = VRNDSCALEPSZ256rri $ymm0, 15, implicit $mxcsr ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $rax, 0, $noreg, 32 $ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $rax, 0, $noreg, 228 ; CHECK: $ymm0 = VPERM2F128rr $ymm0, $ymm1, 32 @@ -1075,46 +1075,46 @@ body: | VMOVLPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 ; CHECK: $xmm0 = VMOVLPSrm $xmm0, $rdi, 1, $noreg, 0, $noreg $xmm0 = VMOVLPSZ128rm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VMAXCPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXCPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXCPDrr $xmm0, $xmm1 - $xmm0 = VMAXCPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXCPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXCPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXCPSrr $xmm0, $xmm1 - $xmm0 = VMAXCPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXPDrr $xmm0, $xmm1 - $xmm0 = VMAXPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXPSrr $xmm0, $xmm1 - $xmm0 = VMAXPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINCPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINCPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINCPDrr $xmm0, $xmm1 - $xmm0 = VMINCPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINCPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINCPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINCPSrr $xmm0, $xmm1 - $xmm0 = VMINCPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINPDrr $xmm0, $xmm1 - $xmm0 = VMINPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINPSrr $xmm0, $xmm1 - $xmm0 = VMINPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULPDrr $xmm0, $xmm1 - $xmm0 = VMULPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULPSrr $xmm0, $xmm1 - $xmm0 = VMULPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VMAXCPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXCPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXCPSZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXPSZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINCPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINCPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINCPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINCPSZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINPSZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VORPDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VORPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VORPDrr $xmm0, $xmm1 @@ -1295,14 +1295,14 @@ body: | $xmm0 = VPSUBWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPSUBWrr $xmm0, $xmm1 $xmm0 = VPSUBWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDPDrr $xmm0, $xmm1 - $xmm0 = VADDPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDPSrr $xmm0, $xmm1 - $xmm0 = VADDPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VADDPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VANDNPDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VANDNPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VANDNPDrr $xmm0, $xmm1 @@ -1319,14 +1319,14 @@ body: | $xmm0 = VANDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VANDPSrr $xmm0, $xmm1 $xmm0 = VANDPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVPDrr $xmm0, $xmm1 - $xmm0 = VDIVPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVPSrr $xmm0, $xmm1 - $xmm0 = VDIVPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VDIVPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VPXORrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VPXORDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPXORrr $xmm0, $xmm1 @@ -1335,14 +1335,14 @@ body: | $xmm0 = VPXORQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPXORrr $xmm0, $xmm1 $xmm0 = VPXORQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBPDrr $xmm0, $xmm1 - $xmm0 = VSUBPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBPSrr $xmm0, $xmm1 - $xmm0 = VSUBPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VSUBPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VXORPDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VXORPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VXORPDrr $xmm0, $xmm1 @@ -1423,150 +1423,150 @@ body: | $xmm0 = VUNPCKLPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VUNPCKLPSrr $xmm0, $xmm1 $xmm0 = VUNPCKLPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VFMADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD132PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD132PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD132PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD132PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD213PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD213PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD213PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD213PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD231PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD231PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD231PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD231PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADDSUB132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADDSUB132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADDSUB132PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADDSUB132PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADDSUB132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADDSUB132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADDSUB132PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADDSUB132PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADDSUB213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADDSUB213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADDSUB213PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADDSUB213PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADDSUB213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADDSUB213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADDSUB213PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADDSUB213PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADDSUB231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADDSUB231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADDSUB231PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADDSUB231PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADDSUB231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADDSUB231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADDSUB231PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADDSUB231PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB132PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB132PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB132PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB132PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB213PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB213PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB213PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB213PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB231PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB231PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB231PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB231PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUBADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUBADD132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUBADD132PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUBADD132PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUBADD132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUBADD132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUBADD132PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUBADD132PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUBADD213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUBADD213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUBADD213PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUBADD213PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUBADD213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUBADD213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUBADD213PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUBADD213PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUBADD231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUBADD231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUBADD231PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUBADD231PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUBADD231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUBADD231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUBADD231PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUBADD231PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD132PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD132PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD132PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD132PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD213PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD213PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD213PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD213PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD231PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD231PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD231PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD231PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB132PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB132PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB132PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB132PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB213PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB213PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB213PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB213PSZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB231PDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB231PDZ128r $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB231PSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB231PSZ128r $xmm0, $xmm1, $xmm2 + ; CHECK: $xmm0 = VFMADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD132PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD132PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD213PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD213PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD231PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADDSUB132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB132PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADDSUB132PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADDSUB132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB132PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADDSUB132PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADDSUB213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB213PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADDSUB213PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADDSUB213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB213PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADDSUB213PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADDSUB231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB231PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADDSUB231PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADDSUB231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADDSUB231PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADDSUB231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB132PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB132PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB213PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB213PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB231PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUBADD132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD132PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUBADD132PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUBADD132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD132PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUBADD132PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUBADD213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD213PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUBADD213PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUBADD213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD213PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUBADD213PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUBADD231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD231PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUBADD231PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUBADD231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUBADD231PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUBADD231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD132PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD132PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD213PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD213PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD231PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB132PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB132PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB132PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB132PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB213PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB213PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB213PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB213PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB231PDZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231PDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB231PDZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231PSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB231PSZ128m $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231PSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr ; CHECK: $xmm0 = VPSLLDri $xmm0, 7 $xmm0 = VPSLLDZ128ri $xmm0, 7 ; CHECK: $xmm0 = VPSLLDrm $xmm0, $rip, 1, $rax, 0, $noreg @@ -1653,50 +1653,50 @@ body: | $xmm0 = VPERMILPSZ128rm $xmm0, $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPERMILPSrr $xmm0, $xmm1 $xmm0 = VPERMILPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VCVTPH2PSrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPH2PSZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPH2PSrr $xmm0 - $xmm0 = VCVTPH2PSZ128rr $xmm0 + ; CHECK: $xmm0 = VCVTPH2PSrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPH2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPH2PSrr $xmm0, implicit $mxcsr + $xmm0 = VCVTPH2PSZ128rr $xmm0, implicit $mxcsr ; CHECK: $xmm0 = VCVTDQ2PDrm $rdi, 1, $noreg, 0, $noreg $xmm0 = VCVTDQ2PDZ128rm $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VCVTDQ2PDrr $xmm0 $xmm0 = VCVTDQ2PDZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTDQ2PSrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTDQ2PSZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTDQ2PSrr $xmm0 - $xmm0 = VCVTDQ2PSZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTPD2DQrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPD2DQrr $xmm0 - $xmm0 = VCVTPD2DQZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTPD2PSrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPD2PSZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPD2PSrr $xmm0 - $xmm0 = VCVTPD2PSZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTPS2DQrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPS2DQrr $xmm0 - $xmm0 = VCVTPS2DQZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTPS2PDrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTPS2PDZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTPS2PDrr $xmm0 - $xmm0 = VCVTPS2PDZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTTPD2DQrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTTPD2DQrr $xmm0 - $xmm0 = VCVTTPD2DQZ128rr $xmm0 - ; CHECK: $xmm0 = VCVTTPS2DQrm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTTPS2DQrr $xmm0 - $xmm0 = VCVTTPS2DQZ128rr $xmm0 - ; CHECK: $xmm0 = VSQRTPDm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VSQRTPDZ128m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VSQRTPDr $xmm0 - $xmm0 = VSQRTPDZ128r $xmm0 - ; CHECK: $xmm0 = VSQRTPSm $rdi, 1, $noreg, 0, $noreg - $xmm0 = VSQRTPSZ128m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VSQRTPSr $xmm0 - $xmm0 = VSQRTPSZ128r $xmm0 + ; CHECK: $xmm0 = VCVTDQ2PSrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTDQ2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTDQ2PSrr $xmm0, implicit $mxcsr + $xmm0 = VCVTDQ2PSZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2DQrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2DQrr $xmm0, implicit $mxcsr + $xmm0 = VCVTPD2DQZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2PSrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPD2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPD2PSrr $xmm0, implicit $mxcsr + $xmm0 = VCVTPD2PSZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPS2DQrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPS2DQrr $xmm0, implicit $mxcsr + $xmm0 = VCVTPS2DQZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPS2PDrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTPS2PDZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPS2PDrr $xmm0, implicit $mxcsr + $xmm0 = VCVTPS2PDZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTTPD2DQrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTTPD2DQrr $xmm0, implicit $mxcsr + $xmm0 = VCVTTPD2DQZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTTPS2DQrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTTPS2DQrr $xmm0, implicit $mxcsr + $xmm0 = VCVTTPS2DQZ128rr $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTPDm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSQRTPDZ128m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTPDr $xmm0, implicit $mxcsr + $xmm0 = VSQRTPDZ128r $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTPSm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSQRTPSZ128m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTPSr $xmm0, implicit $mxcsr + $xmm0 = VSQRTPSZ128r $xmm0, implicit $mxcsr ; CHECK: $xmm0 = VMOVDDUPrm $rdi, 1, $noreg, 0, $noreg $xmm0 = VMOVDDUPZ128rm $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDDUPrr $xmm0 @@ -1763,10 +1763,10 @@ body: | $xmm0 = VBROADCASTI32X2Z128m $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPBROADCASTQrr $xmm0 $xmm0 = VBROADCASTI32X2Z128r $xmm0 - ; CHECK: $xmm0 = VCVTPS2PHrr $xmm0, 2 - $xmm0 = VCVTPS2PHZ128rr $xmm0, 2 - ; CHECK: VCVTPS2PHmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 2 - VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0, 2 + ; CHECK: $xmm0 = VCVTPS2PHrr $xmm0, 2, implicit $mxcsr + $xmm0 = VCVTPS2PHZ128rr $xmm0, 2, implicit $mxcsr + ; CHECK: VCVTPS2PHmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 2, implicit $mxcsr + VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0, 2, implicit $mxcsr ; CHECK: $xmm0 = VPABSBrm $rip, 1, $rax, 0, $noreg $xmm0 = VPABSBZ128rm $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPABSBrr $xmm0 @@ -1791,14 +1791,14 @@ body: | $xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $rax, 0, $noreg, 1 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 8 $xmm0 = VALIGNQZ128rri $xmm0, $xmm1, 1 - ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15 - $xmm0 = VRNDSCALEPDZ128rri $xmm0, 15 - ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15 - $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15 + ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15, implicit $mxcsr + $xmm0 = VRNDSCALEPDZ128rri $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr + $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr RET 0, $zmm0, $zmm1 ... @@ -1810,310 +1810,310 @@ name: evex_scalar_to_vex_test body: | bb.0: - ; CHECK: $xmm0 = VADDSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSDrr $xmm0, $xmm1 - $xmm0 = VADDSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDSDrr_Int $xmm0, $xmm1 - $xmm0 = VADDSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSSrr $xmm0, $xmm1 - $xmm0 = VADDSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDSSrr_Int $xmm0, $xmm1 - $xmm0 = VADDSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSDrr $xmm0, $xmm1 - $xmm0 = VDIVSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSDrr_Int $xmm0, $xmm1 - $xmm0 = VDIVSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSSrr $xmm0, $xmm1 - $xmm0 = VDIVSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSSrr_Int $xmm0, $xmm1 - $xmm0 = VDIVSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXCSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXCSDrr $xmm0, $xmm1 - $xmm0 = VMAXCSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXCSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXCSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXCSSrr $xmm0, $xmm1 - $xmm0 = VMAXCSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXSDrr $xmm0, $xmm1 - $xmm0 = VMAXSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXSDrr_Int $xmm0, $xmm1 - $xmm0 = VMAXSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMAXSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMAXSSrr $xmm0, $xmm1 - $xmm0 = VMAXSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMAXSSrr_Int $xmm0, $xmm1 - $xmm0 = VMAXSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINCSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINCSDrr $xmm0, $xmm1 - $xmm0 = VMINCSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINCSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINCSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINCSSrr $xmm0, $xmm1 - $xmm0 = VMINCSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINSDrr $xmm0, $xmm1 - $xmm0 = VMINSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINSDrr_Int $xmm0, $xmm1 - $xmm0 = VMINSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMINSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMINSSrr $xmm0, $xmm1 - $xmm0 = VMINSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMINSSrr_Int $xmm0, $xmm1 - $xmm0 = VMINSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSDrr $xmm0, $xmm1 - $xmm0 = VMULSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSDrr_Int $xmm0, $xmm1 - $xmm0 = VMULSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSSrr $xmm0, $xmm1 - $xmm0 = VMULSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSSrr_Int $xmm0, $xmm1 - $xmm0 = VMULSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSDrr $xmm0, $xmm1 - $xmm0 = VSUBSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSDrr_Int $xmm0, $xmm1 - $xmm0 = VSUBSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSSrr $xmm0, $xmm1 - $xmm0 = VSUBSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSSrr_Int $xmm0, $xmm1 - $xmm0 = VSUBSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VFMADD132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD132SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD132SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD132SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD132SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD132SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD132SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD132SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD132SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD213SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD213SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD213SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD213SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD213SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD213SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD213SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD213SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD231SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD231SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD231SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD231SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMADD231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMADD231SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD231SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMADD231SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMADD231SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB132SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB132SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB132SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB132SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB132SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB132SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB132SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB132SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB213SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB213SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB213SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB213SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB213SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB213SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB213SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB213SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB231SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB231SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB231SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB231SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFMSUB231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFMSUB231SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB231SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFMSUB231SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFMSUB231SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD132SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD132SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD132SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD132SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD132SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD132SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD132SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD132SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD213SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD213SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD213SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD213SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD213SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD213SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD213SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD213SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD231SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD231SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD231SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD231SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMADD231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMADD231SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD231SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMADD231SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMADD231SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB132SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB132SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB132SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB132SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB132SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB132SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB132SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB132SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB213SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB213SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB213SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB213SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB213SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB213SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB213SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB213SSZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB231SDr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB231SDZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB231SDr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB231SDZr_Int $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - $xmm0 = VFNMSUB231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VFNMSUB231SSr $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB231SSZr $xmm0, $xmm1, $xmm2 - ; CHECK: $xmm0 = VFNMSUB231SSr_Int $xmm0, $xmm1, $xmm2 - $xmm0 = VFNMSUB231SSZr_Int $xmm0, $xmm1, $xmm2 + ; CHECK: $xmm0 = VADDSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXCSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXCSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMAXSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINCSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINCSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINCSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINCSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMINSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMINSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD132SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD132SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD132SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD132SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD132SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD213SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD213SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD213SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD213SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD213SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD231SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD231SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMADD231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD231SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMADD231SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMADD231SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB132SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB132SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB132SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB132SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB132SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB213SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB213SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB213SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB213SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB213SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB231SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB231SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFMSUB231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB231SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFMSUB231SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFMSUB231SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD132SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD132SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD132SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD132SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD132SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD213SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD213SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD213SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD213SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD213SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD231SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD231SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMADD231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD231SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMADD231SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMADD231SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB132SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB132SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB132SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB132SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB132SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB132SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB132SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB132SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB213SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB213SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB213SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB213SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB213SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB213SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB213SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB213SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB213SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB231SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB231SDZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SDr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB231SDZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SDr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB231SDZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SSm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB231SSZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SSm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VFNMSUB231SSZm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SSr $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB231SSZr $xmm0, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm0 = VFNMSUB231SSr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr + $xmm0 = VFNMSUB231SSZr_Int $xmm0, $xmm1, $xmm2, implicit $mxcsr ; CHECK: VPEXTRBmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 3 VPEXTRBZmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 3 ; CHECK: $eax = VPEXTRBrr $xmm0, 1 @@ -2148,34 +2148,34 @@ body: | $xmm0 = VPINSRWZrm $xmm0, $rsi, 1, $noreg, 0, $noreg, 3 ; CHECK: $xmm0 = VPINSRWrr $xmm0, $edi, 5 $xmm0 = VPINSRWZrr $xmm0, $edi, 5 - ; CHECK: $xmm0 = VSQRTSDm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VSQRTSDZm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VSQRTSDm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VSQRTSDZm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VSQRTSDr $xmm0, $noreg - $xmm0 = VSQRTSDZr $xmm0, $noreg - ; CHECK: $xmm0 = VSQRTSDr_Int $xmm0, $noreg - $xmm0 = VSQRTSDZr_Int $xmm0, $noreg - ; CHECK: $xmm0 = VSQRTSSm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VSQRTSSZm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VSQRTSSm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VSQRTSSZm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VSQRTSSr $xmm0, $xmm1 - $xmm0 = VSQRTSSZr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSQRTSSr_Int $xmm0, $xmm1 - $xmm0 = VSQRTSSZr_Int $xmm0, $xmm1 - ; CHECK: $rdi = VCVTSD2SI64rr_Int $xmm0 - $rdi = VCVTSD2SI64Zrr_Int $xmm0 - ; CHECK: $edi = VCVTSD2SIrr_Int $xmm0 - $edi = VCVTSD2SIZrr_Int $xmm0 - ; CHECK: $xmm0 = VCVTSD2SSrm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSD2SSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSD2SSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSD2SSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSD2SSrr $xmm0, $xmm1 - $xmm0 = VCVTSD2SSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VCVTSD2SSrr_Int $xmm0, $xmm1 - $xmm0 = VCVTSD2SSZrr_Int $xmm0, $xmm1 + ; CHECK: $xmm0 = VSQRTSDm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSQRTSDZm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSDm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSQRTSDZm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSDr $xmm0, $noreg, implicit $mxcsr + $xmm0 = VSQRTSDZr $xmm0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSDr_Int $xmm0, $noreg, implicit $mxcsr + $xmm0 = VSQRTSDZr_Int $xmm0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSSm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSQRTSSZm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSSm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSQRTSSZm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSSr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSQRTSSZr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSSr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSQRTSSZr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $rdi = VCVTSD2SI64rr_Int $xmm0, implicit $mxcsr + $rdi = VCVTSD2SI64Zrr_Int $xmm0, implicit $mxcsr + ; CHECK: $edi = VCVTSD2SIrr_Int $xmm0, implicit $mxcsr + $edi = VCVTSD2SIZrr_Int $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSD2SSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSD2SSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSD2SSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSD2SSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSD2SSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VCVTSD2SSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSD2SSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VCVTSD2SSZrr_Int $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VCVTSI2SDrm $xmm0, $rdi, 1, $noreg, 0, $noreg $xmm0 = VCVTSI2SDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VCVTSI2SDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg @@ -2184,78 +2184,78 @@ body: | $xmm0 = VCVTSI2SDZrr $xmm0, $edi ; CHECK: $xmm0 = VCVTSI2SDrr_Int $xmm0, $edi $xmm0 = VCVTSI2SDZrr_Int $xmm0, $edi - ; CHECK: $xmm0 = VCVTSI2SSrm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSI2SSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSI2SSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSI2SSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSI2SSrr $xmm0, $edi - $xmm0 = VCVTSI2SSZrr $xmm0, $edi - ; CHECK: $xmm0 = VCVTSI2SSrr_Int $xmm0, $edi - $xmm0 = VCVTSI2SSZrr_Int $xmm0, $edi - ; CHECK: $xmm0 = VCVTSI642SDrm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSI642SDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSI642SDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSI642SDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSI642SDrr $xmm0, $rdi - $xmm0 = VCVTSI642SDZrr $xmm0, $rdi - ; CHECK: $xmm0 = VCVTSI642SDrr_Int $xmm0, $rdi - $xmm0 = VCVTSI642SDZrr_Int $xmm0, $rdi - ; CHECK: $xmm0 = VCVTSI642SSrm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSI642SSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSI642SSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSI642SSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSI642SSrr $xmm0, $rdi - $xmm0 = VCVTSI642SSZrr $xmm0, $rdi - ; CHECK: $xmm0 = VCVTSI642SSrr_Int $xmm0, $rdi - $xmm0 = VCVTSI642SSZrr_Int $xmm0, $rdi - ; CHECK: $xmm0 = VCVTSS2SDrm $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSS2SDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSS2SDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - $xmm0 = VCVTSS2SDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VCVTSS2SDrr $xmm0, $xmm1 - $xmm0 = VCVTSS2SDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VCVTSS2SDrr_Int $xmm0, $xmm1 - $xmm0 = VCVTSS2SDZrr_Int $xmm0, $xmm1 - ; CHECK: $rdi = VCVTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTSS2SI64rr_Int $xmm0 - $rdi = VCVTSS2SI64Zrr_Int $xmm0 - ; CHECK: $edi = VCVTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTSS2SIrr_Int $xmm0 - $edi = VCVTSS2SIZrr_Int $xmm0 - ; CHECK: $rdi = VCVTTSD2SI64rm $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSD2SI64Zrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSD2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSD2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSD2SI64rr $xmm0 - $rdi = VCVTTSD2SI64Zrr $xmm0 - ; CHECK: $rdi = VCVTTSD2SI64rr_Int $xmm0 - $rdi = VCVTTSD2SI64Zrr_Int $xmm0 - ; CHECK: $edi = VCVTTSD2SIrm $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSD2SIZrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSD2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSD2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSD2SIrr $xmm0 - $edi = VCVTTSD2SIZrr $xmm0 - ; CHECK: $edi = VCVTTSD2SIrr_Int $xmm0 - $edi = VCVTTSD2SIZrr_Int $xmm0 - ; CHECK: $rdi = VCVTTSS2SI64rm $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSS2SI64Zrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSS2SI64rr $xmm0 - $rdi = VCVTTSS2SI64Zrr $xmm0 - ; CHECK: $rdi = VCVTTSS2SI64rr_Int $xmm0 - $rdi = VCVTTSS2SI64Zrr_Int $xmm0 - ; CHECK: $edi = VCVTTSS2SIrm $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSS2SIZrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSS2SIrr $xmm0 - $edi = VCVTTSS2SIZrr $xmm0 - ; CHECK: $edi = VCVTTSS2SIrr_Int $xmm0 - $edi = VCVTTSS2SIZrr_Int $xmm0 + ; CHECK: $xmm0 = VCVTSI2SSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSI2SSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI2SSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSI2SSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI2SSrr $xmm0, $edi, implicit $mxcsr + $xmm0 = VCVTSI2SSZrr $xmm0, $edi, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI2SSrr_Int $xmm0, $edi, implicit $mxcsr + $xmm0 = VCVTSI2SSZrr_Int $xmm0, $edi, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSI642SDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSI642SDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SDrr $xmm0, $rdi, implicit $mxcsr + $xmm0 = VCVTSI642SDZrr $xmm0, $rdi, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SDrr_Int $xmm0, $rdi, implicit $mxcsr + $xmm0 = VCVTSI642SDZrr_Int $xmm0, $rdi, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSI642SSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSI642SSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SSrr $xmm0, $rdi, implicit $mxcsr + $xmm0 = VCVTSI642SSZrr $xmm0, $rdi, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSI642SSrr_Int $xmm0, $rdi, implicit $mxcsr + $xmm0 = VCVTSI642SSZrr_Int $xmm0, $rdi, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSS2SDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSS2SDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSS2SDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VCVTSS2SDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSS2SDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VCVTSS2SDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VCVTSS2SDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VCVTSS2SDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $rdi = VCVTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTSS2SI64rr_Int $xmm0, implicit $mxcsr + $rdi = VCVTSS2SI64Zrr_Int $xmm0, implicit $mxcsr + ; CHECK: $edi = VCVTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTSS2SIrr_Int $xmm0, implicit $mxcsr + $edi = VCVTSS2SIZrr_Int $xmm0, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64rr $xmm0, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrr $xmm0, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64rr_Int $xmm0, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrr_Int $xmm0, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSD2SIZrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSD2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIrr $xmm0, implicit $mxcsr + $edi = VCVTTSD2SIZrr $xmm0, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIrr_Int $xmm0, implicit $mxcsr + $edi = VCVTTSD2SIZrr_Int $xmm0, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64rr $xmm0, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrr $xmm0, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64rr_Int $xmm0, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrr_Int $xmm0, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSS2SIZrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIrr $xmm0, implicit $mxcsr + $edi = VCVTTSS2SIZrr $xmm0, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIrr_Int $xmm0, implicit $mxcsr + $edi = VCVTTSS2SIZrr_Int $xmm0, implicit $mxcsr ; CHECK: $xmm0 = VMOV64toSDrr $rdi $xmm0 = VMOV64toSDZrr $rdi ; CHECK: $xmm0 = VMOVDI2SSrr $eax @@ -2354,22 +2354,22 @@ body: | $xmm0 = VINSERTPSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VINSERTPSrr $xmm0, $xmm0, 1 $xmm0 = VINSERTPSZrr $xmm0, $xmm0, 1 - ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15 - $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15 - ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15 - $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15 - ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15 - $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15 - ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15 - $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15 + ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15, implicit $mxcsr + $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15, implicit $mxcsr + $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15, implicit $mxcsr + $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr + $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr RET 0, $zmm0, $zmm1 ... @@ -2530,14 +2530,14 @@ body: | $ymm16 = VPADDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPADDWZ256rr $ymm16, $ymm1 $ymm16 = VPADDWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMULPDZ256rr $ymm16, $ymm1 - $ymm16 = VMULPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMULPSZ256rr $ymm16, $ymm1 - $ymm16 = VMULPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMULPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMULPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMULPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMULPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VORPDZ256rr $ymm16, $ymm1 @@ -2678,14 +2678,14 @@ body: | $ymm16 = VPXORQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPXORQZ256rr $ymm16, $ymm1 $ymm16 = VPXORQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VADDPDZ256rr $ymm16, $ymm1 - $ymm16 = VADDPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VADDPSZ256rr $ymm16, $ymm1 - $ymm16 = VADDPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VADDPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VADDPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VADDPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VADDPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VANDNPDZ256rr $ymm16, $ymm1 @@ -2702,46 +2702,46 @@ body: | $ymm16 = VANDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VANDPSZ256rr $ymm16, $ymm1 $ymm16 = VANDPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VDIVPDZ256rr $ymm16, $ymm1 - $ymm16 = VDIVPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VDIVPSZ256rr $ymm16, $ymm1 - $ymm16 = VDIVPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1 - $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMAXCPSZ256rr $ymm16, $ymm1 - $ymm16 = VMAXCPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMAXPDZ256rr $ymm16, $ymm1 - $ymm16 = VMAXPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMAXPSZ256rr $ymm16, $ymm1 - $ymm16 = VMAXPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMINCPDZ256rr $ymm16, $ymm1 - $ymm16 = VMINCPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMINCPSZ256rr $ymm16, $ymm1 - $ymm16 = VMINCPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMINPDZ256rr $ymm16, $ymm1 - $ymm16 = VMINPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMINPSZ256rr $ymm16, $ymm1 - $ymm16 = VMINPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VDIVPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VDIVPSZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXCPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMAXCPSZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMAXPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMAXPSZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINCPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMINCPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINCPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMINCPSZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMINPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMINPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VXORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VXORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VXORPDZ256rr $ymm16, $ymm1 @@ -2782,14 +2782,14 @@ body: | $ymm16 = VUNPCKLPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VUNPCKLPSZ256rr $ymm16, $ymm1 $ymm16 = VUNPCKLPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VSUBPDZ256rr $ymm16, $ymm1 - $ymm16 = VSUBPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VSUBPSZ256rr $ymm16, $ymm1 - $ymm16 = VSUBPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VSUBPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VSUBPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPUNPCKHBWZ256rr $ymm16, $ymm1 @@ -2822,150 +2822,150 @@ body: | $ymm16 = VPUNPCKLWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPUNPCKLWDZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKLWDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VFMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADD132PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADD132PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADD132PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADD132PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADD213PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADD213PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADD213PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADD213PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADD231PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADD231PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADD231PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADD231PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADDSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADDSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADDSUB132PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADDSUB132PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADDSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADDSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADDSUB132PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADDSUB132PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADDSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADDSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADDSUB213PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADDSUB213PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADDSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADDSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADDSUB213PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADDSUB213PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADDSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADDSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADDSUB231PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADDSUB231PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMADDSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMADDSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMADDSUB231PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMADDSUB231PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUB132PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUB132PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUB132PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUB132PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUB213PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUB213PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUB213PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUB213PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUB231PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUB231PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUB231PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUB231PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUBADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUBADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUBADD132PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUBADD132PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUBADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUBADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUBADD132PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUBADD132PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUBADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUBADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUBADD213PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUBADD213PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUBADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUBADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUBADD213PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUBADD213PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUBADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUBADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUBADD231PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUBADD231PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFMSUBADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFMSUBADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFMSUBADD231PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFMSUBADD231PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMADD132PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMADD132PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMADD132PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMADD132PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMADD213PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMADD213PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMADD213PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMADD213PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMADD231PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMADD231PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMADD231PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMADD231PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMSUB132PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMSUB132PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMSUB132PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMSUB132PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMSUB213PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMSUB213PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMSUB213PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMSUB213PSZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMSUB231PDZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMSUB231PDZ256r $ymm16, $ymm1, $ymm2 - ; CHECK: $ymm16 = VFNMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - $ymm16 = VFNMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VFNMSUB231PSZ256r $ymm16, $ymm1, $ymm2 - $ymm16 = VFNMSUB231PSZ256r $ymm16, $ymm1, $ymm2 + ; CHECK: $ymm16 = VFMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADD132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADD132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADD213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADD213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADD231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADD231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADD231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADDSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADDSUB132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADDSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADDSUB132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADDSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADDSUB213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADDSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADDSUB213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADDSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADDSUB231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMADDSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMADDSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMADDSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUB132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUB132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUB213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUB213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUB231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUBADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUBADD132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUBADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUBADD132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUBADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUBADD213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUBADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUBADD213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUBADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUBADD231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFMSUBADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFMSUBADD231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFMSUBADD231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMADD132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMADD132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMADD132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMADD213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMADD213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMADD213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMADD213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMADD231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMADD231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMADD231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMADD231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMADD231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMSUB132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMSUB132PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMSUB132PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMSUB132PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMSUB213PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMSUB213PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMSUB213PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMSUB213PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMSUB231PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMSUB231PDZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VFNMSUB231PSZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VFNMSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr + $ymm16 = VFNMSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr ; CHECK: $ymm16 = VPSRADZ256ri $ymm16, 7 $ymm16 = VPSRADZ256ri $ymm16, 7 ; CHECK: $ymm16 = VPSRADZ256rm $ymm16, $rip, 1, $rax, 0, $noreg @@ -3173,51 +3173,51 @@ body: | ; CHECK: $ymm16 = VCVTDQ2PDZ256rm $rdi, 1, $noreg, 0, $noreg $ymm16 = VCVTDQ2PDZ256rm $rdi, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VCVTDQ2PDZ256rr $xmm0 - $ymm16 = VCVTDQ2PDZ256rr $xmm0 - ; CHECK: $ymm16 = VCVTDQ2PSZ256rm $rdi, 1, $noreg, 0, $noreg - $ymm16 = VCVTDQ2PSZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VCVTDQ2PSZ256rr $ymm16 - $ymm16 = VCVTDQ2PSZ256rr $ymm16 - ; CHECK: $xmm16 = VCVTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPD2DQZ256rr $ymm16 - $xmm16 = VCVTPD2DQZ256rr $ymm16 - ; CHECK: $xmm16 = VCVTPD2PSZ256rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPD2PSZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPD2PSZ256rr $ymm16 - $xmm16 = VCVTPD2PSZ256rr $ymm16 - ; CHECK: $ymm16 = VCVTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg - $ymm16 = VCVTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VCVTPS2DQZ256rr $ymm16 - $ymm16 = VCVTPS2DQZ256rr $ymm16 - ; CHECK: $ymm16 = VCVTPS2PDZ256rm $rdi, 1, $noreg, 0, $noreg - $ymm16 = VCVTPS2PDZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VCVTPS2PDZ256rr $xmm0 - $ymm16 = VCVTPS2PDZ256rr $xmm0 - ; CHECK: VCVTPS2PHZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16, 0 - VCVTPS2PHZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16, 0 - ; CHECK: $xmm0 = VCVTPS2PHZ256rr $ymm16, 0 - $xmm0 = VCVTPS2PHZ256rr $ymm16, 0 - ; CHECK: $ymm16 = VCVTPH2PSZ256rm $rdi, 1, $noreg, 0, $noreg - $ymm16 = VCVTPH2PSZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VCVTPH2PSZ256rr $xmm16 - $ymm16 = VCVTPH2PSZ256rr $xmm16 - ; CHECK: $xmm16 = VCVTTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTTPD2DQZ256rr $ymm16 - $xmm16 = VCVTTPD2DQZ256rr $ymm16 - ; CHECK: $ymm16 = VCVTTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg - $ymm16 = VCVTTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VCVTTPS2DQZ256rr $ymm16 - $ymm16 = VCVTTPS2DQZ256rr $ymm16 - ; CHECK: $ymm16 = VSQRTPDZ256m $rdi, 1, $noreg, 0, $noreg - $ymm16 = VSQRTPDZ256m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VSQRTPDZ256r $ymm16 - $ymm16 = VSQRTPDZ256r $ymm16 - ; CHECK: $ymm16 = VSQRTPSZ256m $rdi, 1, $noreg, 0, $noreg - $ymm16 = VSQRTPSZ256m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $ymm16 = VSQRTPSZ256r $ymm16 - $ymm16 = VSQRTPSZ256r $ymm16 + $ymm16 = VCVTDQ2PDZ256rr $xmm0, implicit $mxcsr + ; CHECK: $ymm16 = VCVTDQ2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VCVTDQ2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VCVTDQ2PSZ256rr $ymm16, implicit $mxcsr + $ymm16 = VCVTDQ2PSZ256rr $ymm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2DQZ256rr $ymm16, implicit $mxcsr + $xmm16 = VCVTPD2DQZ256rr $ymm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPD2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2PSZ256rr $ymm16, implicit $mxcsr + $xmm16 = VCVTPD2PSZ256rr $ymm16, implicit $mxcsr + ; CHECK: $ymm16 = VCVTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VCVTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VCVTPS2DQZ256rr $ymm16, implicit $mxcsr + $ymm16 = VCVTPS2DQZ256rr $ymm16, implicit $mxcsr + ; CHECK: $ymm16 = VCVTPS2PDZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VCVTPS2PDZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VCVTPS2PDZ256rr $xmm0, implicit $mxcsr + $ymm16 = VCVTPS2PDZ256rr $xmm0, implicit $mxcsr + ; CHECK: VCVTPS2PHZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16, 0, implicit $mxcsr + VCVTPS2PHZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16, 0, implicit $mxcsr + ; CHECK: $xmm0 = VCVTPS2PHZ256rr $ymm16, 0, implicit $mxcsr + $xmm0 = VCVTPS2PHZ256rr $ymm16, 0, implicit $mxcsr + ; CHECK: $ymm16 = VCVTPH2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VCVTPH2PSZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VCVTPH2PSZ256rr $xmm16, implicit $mxcsr + $ymm16 = VCVTPH2PSZ256rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTTPD2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTTPD2DQZ256rr $ymm16, implicit $mxcsr + $xmm16 = VCVTTPD2DQZ256rr $ymm16, implicit $mxcsr + ; CHECK: $ymm16 = VCVTTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VCVTTPS2DQZ256rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VCVTTPS2DQZ256rr $ymm16, implicit $mxcsr + $ymm16 = VCVTTPS2DQZ256rr $ymm16, implicit $mxcsr + ; CHECK: $ymm16 = VSQRTPDZ256m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VSQRTPDZ256m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSQRTPDZ256r $ymm16, implicit $mxcsr + $ymm16 = VSQRTPDZ256r $ymm16, implicit $mxcsr + ; CHECK: $ymm16 = VSQRTPSZ256m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VSQRTPSZ256m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSQRTPSZ256r $ymm16, implicit $mxcsr + $ymm16 = VSQRTPSZ256r $ymm16, implicit $mxcsr ; CHECK: $ymm16 = VPALIGNRZ256rmi $ymm16, $rdi, 1, $noreg, 0, $noreg, 1 $ymm16 = VPALIGNRZ256rmi $ymm16, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $ymm16 = VPALIGNRZ256rri $ymm16, $ymm1, 1 @@ -3252,22 +3252,22 @@ body: | $ymm16 = VSHUFPSZ256rmi $ymm16, $rip, 1, $rax, 0, $noreg, -24 ; CHECK: $ymm16 = VSHUFPSZ256rri $ymm16, $ymm1, -24 $ymm16 = VSHUFPSZ256rri $ymm16, $ymm1, -24 - ; CHECK: $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15 - $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $ymm16 = VRNDSCALEPDZ256rri $ymm16, 15 - $ymm16 = VRNDSCALEPDZ256rri $ymm16, 15 - ; CHECK: $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15 - $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $ymm16 = VRNDSCALEPSZ256rri $ymm16, 15 - $ymm16 = VRNDSCALEPSZ256rri $ymm16, 15 - ; CHECK: $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 31 - $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $ymm0 = VRNDSCALEPDZ256rri $ymm0, 31 - $ymm0 = VRNDSCALEPDZ256rri $ymm0, 31 - ; CHECK: $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 31 - $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $ymm0 = VRNDSCALEPSZ256rri $ymm0, 31 - $ymm0 = VRNDSCALEPSZ256rri $ymm0, 31 + ; CHECK: $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm16 = VRNDSCALEPDZ256rri $ymm16, 15, implicit $mxcsr + $ymm16 = VRNDSCALEPDZ256rri $ymm16, 15, implicit $mxcsr + ; CHECK: $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm16 = VRNDSCALEPSZ256rri $ymm16, 15, implicit $mxcsr + $ymm16 = VRNDSCALEPSZ256rri $ymm16, 15, implicit $mxcsr + ; CHECK: $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $ymm0 = VRNDSCALEPDZ256rri $ymm0, 31, implicit $mxcsr + $ymm0 = VRNDSCALEPDZ256rri $ymm0, 31, implicit $mxcsr + ; CHECK: $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $ymm0 = VRNDSCALEPSZ256rri $ymm0, 31, implicit $mxcsr + $ymm0 = VRNDSCALEPSZ256rri $ymm0, 31, implicit $mxcsr ; CHECK: $ymm16 = VSHUFF32X4Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 $ymm16 = VSHUFF32X4Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 ; CHECK: $ymm16 = VSHUFF32X4Z256rri $ymm16, $ymm1, 228 @@ -3446,46 +3446,46 @@ body: | VMOVLPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 ; CHECK: $xmm16 = VMOVLPSZ128rm $xmm16, $rdi, 1, $noreg, 0, $noreg $xmm16 = VMOVLPSZ128rm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXCPDZ128rr $xmm16, $xmm1 - $xmm16 = VMAXCPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXCPSZ128rr $xmm16, $xmm1 - $xmm16 = VMAXCPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXPDZ128rr $xmm16, $xmm1 - $xmm16 = VMAXPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXPSZ128rr $xmm16, $xmm1 - $xmm16 = VMAXPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINCPDZ128rr $xmm16, $xmm1 - $xmm16 = VMINCPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINCPSZ128rr $xmm16, $xmm1 - $xmm16 = VMINCPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINPDZ128rr $xmm16, $xmm1 - $xmm16 = VMINPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINPSZ128rr $xmm16, $xmm1 - $xmm16 = VMINPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULPDZ128rr $xmm16, $xmm1 - $xmm16 = VMULPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULPSZ128rr $xmm16, $xmm1 - $xmm16 = VMULPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXCPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXCPSZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXPSZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINCPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINCPSZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINPSZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VORPDZ128rr $xmm16, $xmm1 @@ -3666,14 +3666,14 @@ body: | $xmm16 = VPSUBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPSUBWZ128rr $xmm16, $xmm1 $xmm16 = VPSUBWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDPDZ128rr $xmm16, $xmm1 - $xmm16 = VADDPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDPSZ128rr $xmm16, $xmm1 - $xmm16 = VADDPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VANDNPDZ128rr $xmm16, $xmm1 @@ -3690,14 +3690,14 @@ body: | $xmm16 = VANDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VANDPSZ128rr $xmm16, $xmm1 $xmm16 = VANDPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVPDZ128rr $xmm16, $xmm1 - $xmm16 = VDIVPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVPSZ128rr $xmm16, $xmm1 - $xmm16 = VDIVPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPXORDZ128rr $xmm16, $xmm1 @@ -3706,14 +3706,14 @@ body: | $xmm16 = VPXORQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPXORQZ128rr $xmm16, $xmm1 $xmm16 = VPXORQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBPDZ128rr $xmm16, $xmm1 - $xmm16 = VSUBPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBPSZ128rr $xmm16, $xmm1 - $xmm16 = VSUBPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VXORPDZ128rr $xmm16, $xmm1 @@ -3794,150 +3794,150 @@ body: | $xmm16 = VUNPCKLPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VUNPCKLPSZ128rr $xmm16, $xmm1 $xmm16 = VUNPCKLPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VFMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD132PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD132PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD132PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD132PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD213PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD213PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD213PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD213PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD231PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD231PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD231PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD231PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADDSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADDSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADDSUB132PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADDSUB132PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADDSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADDSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADDSUB132PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADDSUB132PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADDSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADDSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADDSUB213PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADDSUB213PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADDSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADDSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADDSUB213PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADDSUB213PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADDSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADDSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADDSUB231PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADDSUB231PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADDSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADDSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADDSUB231PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADDSUB231PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB132PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB132PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB132PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB132PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB213PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB213PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB213PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB213PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB231PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB231PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB231PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB231PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUBADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUBADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUBADD132PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUBADD132PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUBADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUBADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUBADD132PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUBADD132PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUBADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUBADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUBADD213PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUBADD213PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUBADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUBADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUBADD213PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUBADD213PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUBADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUBADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUBADD231PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUBADD231PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUBADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUBADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUBADD231PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUBADD231PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD132PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD132PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD132PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD132PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD213PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD213PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD213PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD213PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD231PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD231PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD231PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD231PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB132PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB132PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB132PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB132PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB213PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB213PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB213PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB213PSZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB231PDZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB231PDZ128r $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB231PSZ128r $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB231PSZ128r $xmm16, $xmm1, $xmm2 + ; CHECK: $xmm16 = VFMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADDSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADDSUB132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADDSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADDSUB132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADDSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADDSUB213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADDSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADDSUB213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADDSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADDSUB231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADDSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADDSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADDSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUBADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUBADD132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUBADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUBADD132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUBADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUBADD213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUBADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUBADD213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUBADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUBADD231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUBADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUBADD231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUBADD231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB132PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB132PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB132PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB213PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB213PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB213PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB213PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB231PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB231PDZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB231PSZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr ; CHECK: $xmm16 = VPSLLDZ128ri $xmm16, 7 $xmm16 = VPSLLDZ128ri $xmm16, 7 ; CHECK: $xmm16 = VPSLLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg @@ -4024,50 +4024,50 @@ body: | $xmm16 = VPERMILPSZ128rm $xmm16, $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPERMILPSZ128rr $xmm16, $xmm1 $xmm16 = VPERMILPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VCVTPH2PSZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPH2PSZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPH2PSZ128rr $xmm16 - $xmm16 = VCVTPH2PSZ128rr $xmm16 + ; CHECK: $xmm16 = VCVTPH2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPH2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPH2PSZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTPH2PSZ128rr $xmm16, implicit $mxcsr ; CHECK: $xmm16 = VCVTDQ2PDZ128rm $rdi, 1, $noreg, 0, $noreg $xmm16 = VCVTDQ2PDZ128rm $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VCVTDQ2PDZ128rr $xmm16 $xmm16 = VCVTDQ2PDZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTDQ2PSZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTDQ2PSZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTDQ2PSZ128rr $xmm16 - $xmm16 = VCVTDQ2PSZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPD2DQZ128rr $xmm16 - $xmm16 = VCVTPD2DQZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTPD2PSZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPD2PSZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPD2PSZ128rr $xmm16 - $xmm16 = VCVTPD2PSZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPS2DQZ128rr $xmm16 - $xmm16 = VCVTPS2DQZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTPS2PDZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTPS2PDZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTPS2PDZ128rr $xmm16 - $xmm16 = VCVTPS2PDZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTTPD2DQZ128rr $xmm16 - $xmm16 = VCVTTPD2DQZ128rr $xmm16 - ; CHECK: $xmm16 = VCVTTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTTPS2DQZ128rr $xmm16 - $xmm16 = VCVTTPS2DQZ128rr $xmm16 - ; CHECK: $xmm16 = VSQRTPDZ128m $rdi, 1, $noreg, 0, $noreg - $xmm16 = VSQRTPDZ128m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VSQRTPDZ128r $xmm16 - $xmm16 = VSQRTPDZ128r $xmm16 - ; CHECK: $xmm16 = VSQRTPSZ128m $rdi, 1, $noreg, 0, $noreg - $xmm16 = VSQRTPSZ128m $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VSQRTPSZ128r $xmm16 - $xmm16 = VSQRTPSZ128r $xmm16 + ; CHECK: $xmm16 = VCVTDQ2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTDQ2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTDQ2PSZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTDQ2PSZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2DQZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTPD2DQZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPD2PSZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPD2PSZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTPD2PSZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPS2DQZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTPS2DQZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPS2PDZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTPS2PDZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTPS2PDZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTPS2PDZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTTPD2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTTPD2DQZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTTPD2DQZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTTPS2DQZ128rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTTPS2DQZ128rr $xmm16, implicit $mxcsr + $xmm16 = VCVTTPS2DQZ128rr $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTPDZ128m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSQRTPDZ128m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTPDZ128r $xmm16, implicit $mxcsr + $xmm16 = VSQRTPDZ128r $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTPSZ128m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSQRTPSZ128m $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTPSZ128r $xmm16, implicit $mxcsr + $xmm16 = VSQRTPSZ128r $xmm16, implicit $mxcsr ; CHECK: $xmm16 = VMOVDDUPZ128rm $rdi, 1, $noreg, 0, $noreg $xmm16 = VMOVDDUPZ128rm $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDDUPZ128rr $xmm16 @@ -4134,10 +4134,10 @@ body: | $xmm16 = VBROADCASTI32X2Z128m $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VBROADCASTI32X2Z128r $xmm0 $xmm16 = VBROADCASTI32X2Z128r $xmm0 - ; CHECK: $xmm16 = VCVTPS2PHZ128rr $xmm16, 2 - $xmm16 = VCVTPS2PHZ128rr $xmm16, 2 - ; CHECK: VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16, 2 - VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16, 2 + ; CHECK: $xmm16 = VCVTPS2PHZ128rr $xmm16, 2, implicit $mxcsr + $xmm16 = VCVTPS2PHZ128rr $xmm16, 2, implicit $mxcsr + ; CHECK: VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16, 2, implicit $mxcsr + VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16, 2, implicit $mxcsr ; CHECK: $xmm16 = VPABSBZ128rm $rip, 1, $rax, 0, $noreg $xmm16 = VPABSBZ128rm $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPABSBZ128rr $xmm16 @@ -4162,22 +4162,22 @@ body: | $xmm16 = VINSERTPSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm16 = VINSERTPSZrr $xmm16, $xmm16, 1 $xmm16 = VINSERTPSZrr $xmm16, $xmm16, 1 - ; CHECK: $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15 - $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm16 = VRNDSCALEPDZ128rri $xmm16, 15 - $xmm16 = VRNDSCALEPDZ128rri $xmm16, 15 - ; CHECK: $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15 - $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm16 = VRNDSCALEPSZ128rri $xmm16, 15 - $xmm16 = VRNDSCALEPSZ128rri $xmm16, 15 - ; CHECK: $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 31 - $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $xmm0 = VRNDSCALEPDZ128rri $xmm0, 31 - $xmm0 = VRNDSCALEPDZ128rri $xmm0, 31 - ; CHECK: $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 31 - $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31 - $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31 + ; CHECK: $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALEPDZ128rri $xmm16, 15, implicit $mxcsr + $xmm16 = VRNDSCALEPDZ128rri $xmm16, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALEPSZ128rri $xmm16, 15, implicit $mxcsr + $xmm16 = VRNDSCALEPSZ128rri $xmm16, 15, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALEPDZ128rri $xmm0, 31, implicit $mxcsr + $xmm0 = VRNDSCALEPDZ128rri $xmm0, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31, implicit $mxcsr + $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31, implicit $mxcsr RET 0, $zmm0, $zmm1 ... @@ -4188,310 +4188,310 @@ body: | name: evex_scalar_to_evex_test body: | bb.0: - ; CHECK: $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSDZrr $xmm16, $xmm1 - $xmm16 = VADDSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDSDZrr_Int $xmm16, $xmm1 - $xmm16 = VADDSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSSZrr $xmm16, $xmm1 - $xmm16 = VADDSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDSSZrr_Int $xmm16, $xmm1 - $xmm16 = VADDSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSDZrr $xmm16, $xmm1 - $xmm16 = VDIVSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1 - $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSSZrr $xmm16, $xmm1 - $xmm16 = VDIVSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1 - $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXCSDZrr $xmm16, $xmm1 - $xmm16 = VMAXCSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXCSSZrr $xmm16, $xmm1 - $xmm16 = VMAXCSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXSDZrr $xmm16, $xmm1 - $xmm16 = VMAXSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXSDZrr_Int $xmm16, $xmm1 - $xmm16 = VMAXSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMAXSSZrr $xmm16, $xmm1 - $xmm16 = VMAXSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMAXSSZrr_Int $xmm16, $xmm1 - $xmm16 = VMAXSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINCSDZrr $xmm16, $xmm1 - $xmm16 = VMINCSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINCSSZrr $xmm16, $xmm1 - $xmm16 = VMINCSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINSDZrr $xmm16, $xmm1 - $xmm16 = VMINSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINSDZrr_Int $xmm16, $xmm1 - $xmm16 = VMINSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMINSSZrr $xmm16, $xmm1 - $xmm16 = VMINSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMINSSZrr_Int $xmm16, $xmm1 - $xmm16 = VMINSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSDZrr $xmm16, $xmm1 - $xmm16 = VMULSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSDZrr_Int $xmm16, $xmm1 - $xmm16 = VMULSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSSZrr $xmm16, $xmm1 - $xmm16 = VMULSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSSZrr_Int $xmm16, $xmm1 - $xmm16 = VMULSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSDZrr $xmm16, $xmm1 - $xmm16 = VSUBSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1 - $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSSZrr $xmm16, $xmm1 - $xmm16 = VSUBSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1 - $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VFMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD132SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD132SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD132SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD132SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD132SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD132SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD132SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD132SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD213SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD213SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD213SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD213SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD213SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD213SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD213SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD213SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD231SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD231SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD231SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD231SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMADD231SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD231SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMADD231SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMADD231SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB132SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB132SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB132SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB132SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB132SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB132SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB132SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB132SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB213SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB213SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB213SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB213SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB213SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB213SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB213SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB213SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB231SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB231SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB231SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB231SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFMSUB231SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB231SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFMSUB231SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFMSUB231SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD132SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD132SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD132SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD132SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD132SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD132SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD132SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD132SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD213SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD213SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD213SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD213SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD213SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD213SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD213SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD213SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD231SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD231SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD231SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD231SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMADD231SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD231SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMADD231SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMADD231SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB132SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB132SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB132SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB132SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB132SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB132SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB132SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB132SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB213SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB213SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB213SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB213SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB213SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB213SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB213SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB213SSZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB231SDZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB231SDZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB231SDZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB231SDZr_Int $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - $xmm16 = VFNMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VFNMSUB231SSZr $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB231SSZr $xmm16, $xmm1, $xmm2 - ; CHECK: $xmm16 = VFNMSUB231SSZr_Int $xmm16, $xmm1, $xmm2 - $xmm16 = VFNMSUB231SSZr_Int $xmm16, $xmm1, $xmm2 + ; CHECK: $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXCSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXCSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMAXSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINCSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINCSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMINSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMINSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMADD231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMADD231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFMSUB231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFMSUB231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMADD231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMADD231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMADD231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB132SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB132SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB132SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB132SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB132SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB132SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB213SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB213SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB213SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB213SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB213SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB213SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB213SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB213SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB231SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB231SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB231SDZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB231SDZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB231SSZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VFNMSUB231SSZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB231SSZr $xmm16, $xmm1, $xmm2, implicit $mxcsr + ; CHECK: $xmm16 = VFNMSUB231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr + $xmm16 = VFNMSUB231SSZr_Int $xmm16, $xmm1, $xmm2, implicit $mxcsr ; CHECK: VPEXTRBZmr $rdi, 1, $noreg, 0, $noreg, $xmm16, 3 VPEXTRBZmr $rdi, 1, $noreg, 0, $noreg, $xmm16, 3 ; CHECK: $eax = VPEXTRBZrr $xmm16, 1 @@ -4526,38 +4526,38 @@ body: | $xmm16 = VPINSRWZrm $xmm16, $rsi, 1, $noreg, 0, $noreg, 3 ; CHECK: $xmm16 = VPINSRWZrr $xmm16, $edi, 5 $xmm16 = VPINSRWZrr $xmm16, $edi, 5 - ; CHECK: $xmm16 = VSQRTSDZm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VSQRTSDZm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VSQRTSDZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VSQRTSDZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VSQRTSDZr $xmm16, $xmm1 - $xmm16 = VSQRTSDZr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSQRTSDZr_Int $xmm16, $xmm1 - $xmm16 = VSQRTSDZr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VSQRTSSZm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VSQRTSSZm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VSQRTSSZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VSQRTSSZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VSQRTSSZr $xmm16, $xmm1 - $xmm16 = VSQRTSSZr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSQRTSSZr_Int $xmm16, $xmm1 - $xmm16 = VSQRTSSZr_Int $xmm16, $xmm1 - ; CHECK: $rdi = VCVTSD2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTSD2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTSD2SI64Zrr_Int $xmm16 - $rdi = VCVTSD2SI64Zrr_Int $xmm16 - ; CHECK: $edi = VCVTSD2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTSD2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTSD2SIZrr_Int $xmm16 - $edi = VCVTSD2SIZrr_Int $xmm16 - ; CHECK: $xmm16 = VCVTSD2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSD2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSD2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSD2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSD2SSZrr $xmm16, $noreg - $xmm16 = VCVTSD2SSZrr $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSD2SSZrr_Int $xmm16, $noreg - $xmm16 = VCVTSD2SSZrr_Int $xmm16, $noreg + ; CHECK: $xmm16 = VSQRTSDZm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSQRTSDZm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSDZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSQRTSDZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSDZr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSQRTSDZr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSDZr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSQRTSDZr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSSZm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSQRTSSZm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSSZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSQRTSSZm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSSZr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSQRTSSZr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSQRTSSZr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSQRTSSZr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $rdi = VCVTSD2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTSD2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTSD2SI64Zrr_Int $xmm16, implicit $mxcsr + $rdi = VCVTSD2SI64Zrr_Int $xmm16, implicit $mxcsr + ; CHECK: $edi = VCVTSD2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTSD2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTSD2SIZrr_Int $xmm16, implicit $mxcsr + $edi = VCVTSD2SIZrr_Int $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSD2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSD2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSD2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSD2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSD2SSZrr $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSD2SSZrr $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSD2SSZrr_Int $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSD2SSZrr_Int $xmm16, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg $xmm16 = VCVTSI2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VCVTSI2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg @@ -4566,78 +4566,78 @@ body: | $xmm16 = VCVTSI2SDZrr $xmm16, $noreg ; CHECK: $xmm16 = VCVTSI2SDZrr_Int $xmm16, $noreg $xmm16 = VCVTSI2SDZrr_Int $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSI2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSI2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI2SSZrr $xmm16, $noreg - $xmm16 = VCVTSI2SSZrr $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI2SSZrr_Int $xmm16, $noreg - $xmm16 = VCVTSI2SSZrr_Int $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI642SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSI642SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI642SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSI642SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI642SDZrr $xmm16, $noreg - $xmm16 = VCVTSI642SDZrr $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI642SDZrr_Int $xmm16, $noreg - $xmm16 = VCVTSI642SDZrr_Int $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI642SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSI642SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI642SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSI642SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI642SSZrr $xmm16, $noreg - $xmm16 = VCVTSI642SSZrr $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI642SSZrr_Int $xmm16, $noreg - $xmm16 = VCVTSI642SSZrr_Int $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSS2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSS2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSS2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - $xmm16 = VCVTSS2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSS2SDZrr $xmm16, $noreg - $xmm16 = VCVTSS2SDZrr $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSS2SDZrr_Int $xmm16, $noreg - $xmm16 = VCVTSS2SDZrr_Int $xmm16, $noreg - ; CHECK: $rdi = VCVTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTSS2SI64Zrr_Int $xmm16 - $rdi = VCVTSS2SI64Zrr_Int $xmm16 - ; CHECK: $edi = VCVTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTSS2SIZrr_Int $xmm16 - $edi = VCVTSS2SIZrr_Int $xmm16 - ; CHECK: $rdi = VCVTTSD2SI64rm $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSD2SI64Zrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSD2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSD2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSD2SI64Zrr $xmm16 - $rdi = VCVTTSD2SI64Zrr $xmm16 - ; CHECK: $rdi = VCVTTSD2SI64Zrr_Int $xmm16 - $rdi = VCVTTSD2SI64Zrr_Int $xmm16 - ; CHECK: $edi = VCVTTSD2SIrm $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSD2SIZrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSD2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSD2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSD2SIZrr $xmm16 - $edi = VCVTTSD2SIZrr $xmm16 - ; CHECK: $edi = VCVTTSD2SIZrr_Int $xmm16 - $edi = VCVTTSD2SIZrr_Int $xmm16 - ; CHECK: $rdi = VCVTTSS2SI64rm $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSS2SI64Zrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg - $rdi = VCVTTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $rdi = VCVTTSS2SI64Zrr $xmm16 - $rdi = VCVTTSS2SI64Zrr $xmm16 - ; CHECK: $rdi = VCVTTSS2SI64Zrr_Int $xmm16 - $rdi = VCVTTSS2SI64Zrr_Int $xmm16 - ; CHECK: $edi = VCVTTSS2SIrm $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSS2SIZrm $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg - $edi = VCVTTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg - ; CHECK: $edi = VCVTTSS2SIZrr $xmm16 - $edi = VCVTTSS2SIZrr $xmm16 - ; CHECK: $edi = VCVTTSS2SIZrr_Int $xmm16 - $edi = VCVTTSS2SIZrr_Int $xmm16 + ; CHECK: $xmm16 = VCVTSI2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSI2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSI2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI2SSZrr $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSI2SSZrr $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI2SSZrr_Int $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSI2SSZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SDZrr $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SDZrr $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SDZrr_Int $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SDZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SSZrr $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SSZrr $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SSZrr_Int $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSI642SSZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSS2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSS2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSS2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VCVTSS2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSS2SDZrr $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSS2SDZrr $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSS2SDZrr_Int $xmm16, $noreg, implicit $mxcsr + $xmm16 = VCVTSS2SDZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTSS2SI64Zrr_Int $xmm16, implicit $mxcsr + $rdi = VCVTSS2SI64Zrr_Int $xmm16, implicit $mxcsr + ; CHECK: $edi = VCVTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTSS2SIZrr_Int $xmm16, implicit $mxcsr + $edi = VCVTSS2SIZrr_Int $xmm16, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64Zrr $xmm16, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrr $xmm16, implicit $mxcsr + ; CHECK: $rdi = VCVTTSD2SI64Zrr_Int $xmm16, implicit $mxcsr + $rdi = VCVTTSD2SI64Zrr_Int $xmm16, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSD2SIZrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSD2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIZrr $xmm16, implicit $mxcsr + $edi = VCVTTSD2SIZrr $xmm16, implicit $mxcsr + ; CHECK: $edi = VCVTTSD2SIZrr_Int $xmm16, implicit $mxcsr + $edi = VCVTTSD2SIZrr_Int $xmm16, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64rm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64Zrr $xmm16, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrr $xmm16, implicit $mxcsr + ; CHECK: $rdi = VCVTTSS2SI64Zrr_Int $xmm16, implicit $mxcsr + $rdi = VCVTTSS2SI64Zrr_Int $xmm16, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSS2SIZrm $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + $edi = VCVTTSS2SIZrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIZrr $xmm16, implicit $mxcsr + $edi = VCVTTSS2SIZrr $xmm16, implicit $mxcsr + ; CHECK: $edi = VCVTTSS2SIZrr_Int $xmm16, implicit $mxcsr + $edi = VCVTTSS2SIZrr_Int $xmm16, implicit $mxcsr ; CHECK: $xmm16 = VMOV64toSDZrr $rdi $xmm16 = VMOV64toSDZrr $rdi ; CHECK: $xmm16 = VMOVDI2SSZrr $eax @@ -4728,38 +4728,38 @@ body: | VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr ; CHECK: VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr - ; CHECK: $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 - $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15 - $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15 - ; CHECK: $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 - $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm16 = VRNDSCALESSZr $xmm16, $xmm1, 15 - $xmm16 = VRNDSCALESSZr $xmm16, $xmm1, 15 - ; CHECK: $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15 - $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm16 = VRNDSCALESDZr_Int $xmm16, $xmm1, 15 - $xmm16 = VRNDSCALESDZr_Int $xmm16, $xmm1, 15 - ; CHECK: $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15 - $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15 - ; CHECK: $xmm16 = VRNDSCALESSZr_Int $xmm16, $xmm1, 15 - $xmm16 = VRNDSCALESSZr_Int $xmm16, $xmm1, 15 - ; CHECK: $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 31 - $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 31 - $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 31 - ; CHECK: $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 31 - $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 31 - $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 31 - ; CHECK: $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31 - $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 31 - $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 31 - ; CHECK: $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31 - $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31 - ; CHECK: $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31 - $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31 + ; CHECK: $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15, implicit $mxcsr + $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESSZr $xmm16, $xmm1, 15, implicit $mxcsr + $xmm16 = VRNDSCALESSZr $xmm16, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESDZr_Int $xmm16, $xmm1, 15, implicit $mxcsr + $xmm16 = VRNDSCALESDZr_Int $xmm16, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESSZr_Int $xmm16, $xmm1, 15, implicit $mxcsr + $xmm16 = VRNDSCALESSZr_Int $xmm16, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 31, implicit $mxcsr + $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 31, implicit $mxcsr + $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 31, implicit $mxcsr + $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31, implicit $mxcsr + $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31, implicit $mxcsr RET 0, $zmm0, $zmm1 ... diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index 58041c29ab64c..011d235c39f62 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -1084,6 +1084,81 @@ entry: ret i64 %result } +; Verify that fptoui(%x) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +define i128 @f20s128(double %x) nounwind strictfp { +; X87-LABEL: f20s128: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %edi +; X87-NEXT: pushl %esi +; X87-NEXT: subl $36, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: calll __fixdfti +; X87-NEXT: subl $4, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X87-NEXT: movl %edi, 8(%esi) +; X87-NEXT: movl %edx, 12(%esi) +; X87-NEXT: movl %eax, (%esi) +; X87-NEXT: movl %ecx, 4(%esi) +; X87-NEXT: movl %esi, %eax +; X87-NEXT: addl $36, %esp +; X87-NEXT: popl %esi +; X87-NEXT: popl %edi +; X87-NEXT: retl $4 +; +; X86-SSE-LABEL: f20s128: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $36, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: calll __fixdfti +; X86-SSE-NEXT: subl $4, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl %edi, 8(%esi) +; X86-SSE-NEXT: movl %edx, 12(%esi) +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl %ecx, 4(%esi) +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: addl $36, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: retl $4 +; +; SSE-LABEL: f20s128: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __fixdfti +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq +; +; AVX-LABEL: f20s128: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: callq __fixdfti +; AVX-NEXT: popq %rcx +; AVX-NEXT: retq +entry: + %result = call i128 @llvm.experimental.constrained.fptosi.i128.f64(double %x, + metadata !"fpexcept.strict") #0 + ret i128 %result +} + ; Verify that fptoui(%x) isn't simplified when the rounding mode is ; unknown. ; Verify that no gross errors happen. @@ -1348,6 +1423,82 @@ entry: ret i64 %result } + +; Verify that fptoui(%x) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +define i128 @f20u128(double %x) nounwind strictfp { +; X87-LABEL: f20u128: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %edi +; X87-NEXT: pushl %esi +; X87-NEXT: subl $36, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: calll __fixunsdfti +; X87-NEXT: subl $4, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X87-NEXT: movl %edi, 8(%esi) +; X87-NEXT: movl %edx, 12(%esi) +; X87-NEXT: movl %eax, (%esi) +; X87-NEXT: movl %ecx, 4(%esi) +; X87-NEXT: movl %esi, %eax +; X87-NEXT: addl $36, %esp +; X87-NEXT: popl %esi +; X87-NEXT: popl %edi +; X87-NEXT: retl $4 +; +; X86-SSE-LABEL: f20u128: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $36, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: calll __fixunsdfti +; X86-SSE-NEXT: subl $4, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl %edi, 8(%esi) +; X86-SSE-NEXT: movl %edx, 12(%esi) +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl %ecx, 4(%esi) +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: addl $36, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: retl $4 +; +; SSE-LABEL: f20u128: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __fixunsdfti +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq +; +; AVX-LABEL: f20u128: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: callq __fixunsdfti +; AVX-NEXT: popq %rcx +; AVX-NEXT: retq +entry: + %result = call i128 @llvm.experimental.constrained.fptoui.i128.f64(double %x, + metadata !"fpexcept.strict") #0 + ret i128 %result +} + ; Verify that round(42.1) isn't simplified when the rounding mode is ; unknown. ; Verify that no gross errors happen. @@ -1823,10 +1974,12 @@ declare i8 @llvm.experimental.constrained.fptosi.i8.f64(double, metadata) declare i16 @llvm.experimental.constrained.fptosi.i16.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f64(double, metadata) declare i8 @llvm.experimental.constrained.fptoui.i8.f64(double, metadata) declare i16 @llvm.experimental.constrained.fptoui.i16.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f64(double, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll index 724095e8aca39..a61f195735ef9 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) @@ -16,7 +16,7 @@ declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, me declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) -declare float @llvm.experimental.constrained.fptrunc.f64.f32(double, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) @@ -70,8 +70,8 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp { ret double %ret } -define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: fadd_fsub_f32: +define float @fadd_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fadd_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -81,12 +81,12 @@ define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: fadd_fsub_f32: +; SSE-X64-LABEL: fadd_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: fadd_fsub_f32: +; AVX-X86-LABEL: fadd_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -96,12 +96,12 @@ define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: fadd_fsub_f32: +; AVX-X64-LABEL: fadd_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: fadd_fsub_f32: +; X87-LABEL: fadd_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) @@ -480,7 +480,7 @@ define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp ; X87-NEXT: popl %eax ; X87-NEXT: retl %1 = load double, double* %val, align 8 - %res = call float @llvm.experimental.constrained.fptrunc.f64.f32(double %1, + %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 store float %res, float* %ret, align 4 diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll new file mode 100644 index 0000000000000..2173ff369a927 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -0,0 +1,569 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=i686-linux-gnu -mattr=-sse | FileCheck %s --check-prefixes=X86 + +; Check soft floating point conversion function calls. + +@vf32 = common global float 0.000000e+00, align 4 +@vf64 = common global double 0.000000e+00, align 8 +@vf80 = common global x86_fp80 0xK00000000000000000000, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @TestFPExtF32_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF32_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: callq __extendsftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF32_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: callq __extendsftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPExtF32_F128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds vf32 +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __extendsftf2 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, vf128+8 +; X86-NEXT: movl %edx, vf128+12 +; X86-NEXT: movl %eax, vf128 +; X86-NEXT: movl %ecx, vf128+4 +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl +entry: + %0 = load float, float* @vf32, align 4 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPExtF64_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF64_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: callq __extenddftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF64_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: callq __extenddftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPExtF64_F128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $40, %esp +; X86-NEXT: fldl vf64 +; X86-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __extenddftf2 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, vf128+8 +; X86-NEXT: movl %edx, vf128+12 +; X86-NEXT: movl %eax, vf128 +; X86-NEXT: movl %ecx, vf128+4 +; X86-NEXT: addl $40, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl +entry: + %0 = load double, double* @vf64, align 8 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPExtF80_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF80_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: fldt {{.*}}(%rip) +; X64-SSE-NEXT: fstpt (%rsp) +; X64-SSE-NEXT: callq __extendxftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF80_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: subq $24, %rsp +; X64-AVX-NEXT: fldt {{.*}}(%rip) +; X64-AVX-NEXT: fstpt (%rsp) +; X64-AVX-NEXT: callq __extendxftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: addq $24, %rsp +; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPExtF80_F128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $40, %esp +; X86-NEXT: fldt vf80 +; X86-NEXT: fstpt {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __extendxftf2 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, vf128+8 +; X86-NEXT: movl %edx, vf128+12 +; X86-NEXT: movl %eax, vf128 +; X86-NEXT: movl %ecx, vf128+4 +; X86-NEXT: addl $40, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl +entry: + %0 = load x86_fp80, x86_fp80* @vf80, align 8 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80 %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPTruncF128_F32() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F32: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfsf2 +; X64-SSE-NEXT: movss %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F32: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfsf2 +; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPTruncF128_F32: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl vf128+12 +; X86-NEXT: pushl vf128+8 +; X86-NEXT: pushl vf128+4 +; X86-NEXT: pushl vf128 +; X86-NEXT: calll __trunctfsf2 +; X86-NEXT: addl $16, %esp +; X86-NEXT: fstps vf32 +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %conv, float* @vf32, align 4 + ret void +} + +define void @TestFPTruncF128_F64() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F64: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfdf2 +; X64-SSE-NEXT: movsd %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F64: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfdf2 +; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPTruncF128_F64: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl vf128+12 +; X86-NEXT: pushl vf128+8 +; X86-NEXT: pushl vf128+4 +; X86-NEXT: pushl vf128 +; X86-NEXT: calll __trunctfdf2 +; X86-NEXT: addl $16, %esp +; X86-NEXT: fstpl vf64 +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %conv, double* @vf64, align 8 + ret void +} + +define void @TestFPTruncF128_F80() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F80: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfxf2 +; X64-SSE-NEXT: fstpt {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F80: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfxf2 +; X64-AVX-NEXT: fstpt {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPTruncF128_F80: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl vf128+12 +; X86-NEXT: pushl vf128+8 +; X86-NEXT: pushl vf128+4 +; X86-NEXT: pushl vf128 +; X86-NEXT: calll __trunctfxf2 +; X86-NEXT: addl $16, %esp +; X86-NEXT: fstpt vf80 +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %conv, x86_fp80* @vf80, align 8 + ret void +} + +define i8 @fptosi_i8(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i8: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptosi_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %conv = call i8 @llvm.experimental.constrained.fptosi.i8.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i8 %conv +} + +define i16 @fptosi_i16(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i16: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptosi_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %conv = call i16 @llvm.experimental.constrained.fptosi.i16.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i16 %conv +} + +define i32 @fptosi_i32(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i32: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptosi_i32: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfsi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @fptosi_i64(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i64: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfdi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptosi_i64: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfdi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i128 @fptosi_i128(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i128: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfti +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptosi_i128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __fixtfti +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 +entry: + %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i8 @fptoui_i8(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i8: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptoui_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %conv = call i8 @llvm.experimental.constrained.fptoui.i8.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i8 %conv +} + +define i16 @fptoui_i16(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i16: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptoui_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +entry: + %conv = call i16 @llvm.experimental.constrained.fptoui.i16.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i16 %conv +} + +define i32 @fptoui_i32(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i32: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixunstfsi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptoui_i32: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfsi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @fptoui_i64(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i64: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixunstfdi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptoui_i64: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfdi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i128 @fptoui_i128(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i128: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixunstfti +; X64-NEXT: popq %rcx +; X64-NEXT: retq +; +; X86-LABEL: fptoui_i128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __fixunstfti +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 +entry: + %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80, metadata) +declare i8 @llvm.experimental.constrained.fptosi.i8.f128(fp128, metadata) +declare i16 @llvm.experimental.constrained.fptosi.i16.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128, metadata) +declare i8 @llvm.experimental.constrained.fptoui.i8.f128(fp128, metadata) +declare i16 @llvm.experimental.constrained.fptoui.i16.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll index c47b92f04e49f..d99ce45f050ca 100644 --- a/llvm/test/CodeGen/X86/fp128-i128.ll +++ b/llvm/test/CodeGen/X86/fp128-i128.ll @@ -496,9 +496,8 @@ define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, ; AVX-NEXT: testl %ebp, %ebp ; AVX-NEXT: jle .LBB10_1 ; AVX-NEXT: # %bb.2: # %if.then -; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm2 ; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX-NEXT: vmovaps %xmm1, %xmm2 ; AVX-NEXT: jmp .LBB10_3 ; AVX-NEXT: .LBB10_1: ; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index a37adcb107c39..05b129ceeeaad 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1,10 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \ ; RUN: -enable-legalize-types-checking \ ; RUN: -disable-strictnode-mutation | FileCheck %s -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \ ; RUN: -enable-legalize-types-checking \ ; RUN: -disable-strictnode-mutation | FileCheck %s +; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+sse2 \ +; RUN: -enable-legalize-types-checking \ +; RUN: -disable-strictnode-mutation | FileCheck %s --check-prefix=X86 ; Check all soft floating point library function calls. @@ -15,6 +18,39 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __addtf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: add: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __addtf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %add = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %add @@ -27,6 +63,39 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __subtf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: sub: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __subtf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %sub = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %sub @@ -39,6 +108,39 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __multf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: mul: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __multf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %mul = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %mul @@ -51,6 +153,39 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __divtf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: div: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __divtf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %div = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %div @@ -63,6 +198,43 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { ; CHECK-NEXT: callq fmal ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: fma: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fmal +; X86-NEXT: addl $60, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %fma = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %fma @@ -75,6 +247,39 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq fmodl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: frem: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fmodl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %div = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %div @@ -87,6 +292,35 @@ define fp128 @ceil(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq ceill ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: ceil: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll ceill +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %ceil = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %ceil @@ -99,6 +333,35 @@ define fp128 @cos(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq cosl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: cos: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll cosl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %cos = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %cos @@ -111,6 +374,35 @@ define fp128 @exp(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq expl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: exp: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll expl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %exp = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %exp @@ -123,6 +415,35 @@ define fp128 @exp2(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq exp2l ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: exp2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll exp2l +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %exp2 = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %exp2 @@ -135,6 +456,35 @@ define fp128 @floor(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq floorl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: floor: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll floorl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %floor = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %floor @@ -147,6 +497,35 @@ define fp128 @log(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq logl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: log: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll logl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %log = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %log @@ -159,6 +538,35 @@ define fp128 @log10(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq log10l ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: log10: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll log10l +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %log10 = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %log10 @@ -171,6 +579,35 @@ define fp128 @log2(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq log2l ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: log2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll log2l +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %log2 = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %log2 @@ -183,6 +620,39 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq fmaxl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: maxnum: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fmaxl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %maxnum = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %maxnum @@ -195,6 +665,39 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq fminl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: minnum: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fminl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %minnum = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %minnum @@ -207,6 +710,35 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq nearbyintl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: nearbyint: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll nearbyintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %nearbyint = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %nearbyint @@ -219,6 +751,39 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq powl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: pow: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll powl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %pow = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %pow @@ -231,6 +796,36 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; CHECK-NEXT: callq __powitf2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: powi: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $8, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __powitf2 +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %powi = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %powi @@ -243,6 +838,35 @@ define fp128 @rint(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq rintl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: rint: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll rintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %rint = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %rint @@ -255,6 +879,35 @@ define fp128 @round(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq roundl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: round: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll roundl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %round = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %round @@ -267,6 +920,35 @@ define fp128 @sin(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq sinl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: sin: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll sinl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %sin = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %sin @@ -279,6 +961,35 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq sqrtl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: sqrt: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll sqrtl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %sqrt @@ -291,11 +1002,132 @@ define fp128 @trunc(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq truncl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: trunc: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll truncl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %trunc = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %trunc } +define i32 @lrint(fp128 %x) nounwind strictfp { +; CHECK-LABEL: lrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lrintl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +; +; X86-LABEL: lrint: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll lrintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret i32 %rint +} + +define i64 @llrint(fp128 %x) nounwind strictfp { +; CHECK-LABEL: llrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq llrintl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +; +; X86-LABEL: llrint: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret i64 %rint +} + +define i32 @lround(fp128 %x) nounwind strictfp { +; CHECK-LABEL: lround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lroundl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +; +; X86-LABEL: lround: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll lroundl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i32 %round +} + +define i64 @llround(fp128 %x) nounwind strictfp { +; CHECK-LABEL: llround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq llroundl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +; +; X86-LABEL: llround: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll llroundl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl +entry: + %round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i64 %round +} + attributes #0 = { strictfp } declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata) @@ -322,3 +1154,7 @@ declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadat declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f128(fp128, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index 1fc5d0196190d..e4fcf54e6950a 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X64 declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) @@ -9,8 +9,8 @@ declare x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80, x86_fp80 declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float, metadata) declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double, metadata) declare x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80, metadata, metadata) -declare float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80, metadata, metadata) -declare double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80, metadata, metadata) define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-LABEL: fadd_fp80: @@ -92,129 +92,102 @@ define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ret x86_fp80 %ret } -define void @fpext_f32_to_fp80(float* %val, x86_fp80* %ret) nounwind strictfp { +define x86_fp80 @fpext_f32_to_fp80(float %a) nounwind strictfp { ; X86-LABEL: fpext_f32_to_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: flds (%ecx) -; X86-NEXT: fstpt (%eax) +; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f32_to_fp80: ; X64: # %bb.0: -; X64-NEXT: flds (%rdi) -; X64-NEXT: fstpt (%rsi) +; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: flds -{{[0-9]+}}(%rsp) ; X64-NEXT: retq - %1 = load float, float* %val, align 4 - %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %1, + %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %a, metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %ret, align 16 - ret void + ret x86_fp80 %ret } -define void @fpext_f64_to_fp80(double* %val, x86_fp80* %ret) nounwind strictfp { +define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp { ; X86-LABEL: fpext_f64_to_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: fldl (%ecx) -; X86-NEXT: fstpt (%eax) +; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f64_to_fp80: ; X64: # %bb.0: -; X64-NEXT: fldl (%rdi) -; X64-NEXT: fstpt (%rsi) +; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldl -{{[0-9]+}}(%rsp) ; X64-NEXT: retq - %1 = load double, double* %val, align 8 - %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %1, + %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %a, metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %ret, align 16 - ret void + ret x86_fp80 %ret } -define void @fptrunc_fp80_to_f32(x86_fp80* %val, float *%ret) nounwind strictfp { +define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f32: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstps (%esp) ; X86-NEXT: flds (%esp) -; X86-NEXT: fstps (%eax) ; X86-NEXT: popl %eax ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f32: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstps -{{[0-9]+}}(%rsp) -; X64-NEXT: flds -{{[0-9]+}}(%rsp) -; X64-NEXT: fstps (%rsi) +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %val, align 16 - %res = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %1, + %ret = call float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store float %res, float* %ret, align 4 - ret void + ret float %ret } -define void @fptrunc_fp80_to_f64(x86_fp80* %val, double* %ret) nounwind strictfp { +define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: fstpl (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: fstpl (%eax) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f64: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) -; X64-NEXT: fldl -{{[0-9]+}}(%rsp) -; X64-NEXT: fstpl (%rsi) +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %val, align 16 - %res = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %1, + %ret = call double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store double %res, double* %ret, align 8 - ret void + ret double %ret } -define void @fsqrt_fp80(x86_fp80* %a) nounwind strictfp { +define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fsqrt_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: fldt (%eax) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fsqrt -; X86-NEXT: fstpt (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fsqrt_fp80: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fsqrt -; X64-NEXT: fstpt (%rdi) ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %a, align 16 - %res = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %1, + %ret = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %a, align 16 - ret void + ret x86_fp80 %ret } attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index a655c5804e1bc..009f2420575f9 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -279,8 +279,7 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-SLOW-NEXT: orl %edi, %edx ; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: .LBB4_2: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl %ebx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edx ; X86-SLOW-NEXT: movb %bl, %ah diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll index 448c21d93ac8e..92118100bba84 100644 --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -289,12 +289,12 @@ define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm0, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed_v2f64: @@ -315,20 +315,20 @@ define <4 x double> @trunc_signed_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v4f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm1, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: cvttsd2si %xmm0, %rdx -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rsi ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rdx, %xmm0 ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rsi, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm2 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed_v4f64: diff --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll index 9f9636361a50a..e40f10a67dd1b 100644 --- a/llvm/test/CodeGen/X86/i128-mul.ll +++ b/llvm/test/CodeGen/X86/i128-mul.ll @@ -88,9 +88,8 @@ define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind ; X86-NEXT: movl 4(%eax,%ebp,8), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: mull %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/implicit-null-checks.mir b/llvm/test/CodeGen/X86/implicit-null-checks.mir index e6147f56ed777..e1ac01a829730 100644 --- a/llvm/test/CodeGen/X86/implicit-null-checks.mir +++ b/llvm/test/CodeGen/X86/implicit-null-checks.mir @@ -828,6 +828,7 @@ name: inc_store_with_dep # CHECK-NEXT: $noreg = FAULTING_OP 3, %bb.2, {{[0-9]+}}, $rdi, 1, $noreg, 16, $noreg, $esi # CHECK-NEXT: JMP_1 %bb.1 # CHECK: bb.1.not_null +# CHECK-NOT: liveins: {{.*}} $eflags alignment: 16 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll index 8d43a1b73234c..980956bdaa88c 100644 --- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -321,7 +321,7 @@ define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { ; ; ALL-LABEL: test_zext_cmp11: ; ALL: # %bb.0: # %entry -; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; ALL-NEXT: vucomisd %xmm2, %xmm0 ; ALL-NEXT: sete %al ; ALL-NEXT: vucomisd %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll new file mode 100644 index 0000000000000..27cd7b98fa60c --- /dev/null +++ b/llvm/test/CodeGen/X86/mixed-ptr-sizes.ll @@ -0,0 +1,128 @@ +; RUN: llc < %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -O0 < %s | FileCheck --check-prefixes=CHECK %s + +; Source to regenerate: +; struct Foo { +; int * __ptr32 p32; +; int * __ptr64 p64; +; __attribute__((address_space(9))) int *p_other; +; }; +; void use_foo(Foo *f); +; void test_sign_ext(Foo *f, int * __ptr32 __sptr i) { +; f->p64 = i; +; use_foo(f); +; } +; void test_zero_ext(Foo *f, int * __ptr32 __uptr i) { +; f->p64 = i; +; use_foo(f); +; } +; void test_trunc(Foo *f, int * __ptr64 i) { +; f->p32 = i; +; use_foo(f); +; } +; void test_noop1(Foo *f, int * __ptr32 i) { +; f->p32 = i; +; use_foo(f); +; } +; void test_noop2(Foo *f, int * __ptr64 i) { +; f->p64 = i; +; use_foo(f); +; } +; void test_null_arg(Foo *f, int * __ptr32 i) { +; test_noop1(f, 0); +; } +; void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) { +; f->p32 = (int * __ptr32)i; +; use_foo(f); +; } +; +; $ clang -cc1 -triple x86_64-windows-msvc -fms-extensions -O2 -S t.cpp + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-windows-msvc" + +%struct.Foo = type { i32 addrspace(270)*, i32*, i32 addrspace(9)* } +declare dso_local void @use_foo(%struct.Foo*) + +define dso_local void @test_sign_ext(%struct.Foo* %f, i32 addrspace(270)* %i) { +; CHECK-LABEL: test_sign_ext +; CHECK: movslq %edx, %rax +entry: + %0 = addrspacecast i32 addrspace(270)* %i to i32* + %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 1 + store i32* %0, i32** %p64, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} + +define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) { +; CHECK-LABEL: test_zero_ext +; CHECK: movl %edx, %eax +entry: + %0 = addrspacecast i32 addrspace(271)* %i to i32* + %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 1 + store i32* %0, i32** %p64, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} + +define dso_local void @test_trunc(%struct.Foo* %f, i32* %i) { +; CHECK-LABEL: test_trunc +; CHECK: movl %edx, (%rcx) +entry: + %0 = addrspacecast i32* %i to i32 addrspace(270)* + %p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 0 + store i32 addrspace(270)* %0, i32 addrspace(270)** %p32, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} + +define dso_local void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* %i) { +; CHECK-LABEL: test_noop1 +; CHECK: movl %edx, (%rcx) +entry: + %p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 0 + store i32 addrspace(270)* %i, i32 addrspace(270)** %p32, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} + +define dso_local void @test_noop2(%struct.Foo* %f, i32* %i) { +; CHECK-LABEL: test_noop2 +; CHECK: movq %rdx, 8(%rcx) +entry: + %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 1 + store i32* %i, i32** %p64, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} + +; Test that null can be passed as a 32-bit pointer. +define dso_local void @test_null_arg(%struct.Foo* %f) { +entry: + call void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* null) + ret void +} + +; Test casts between unrecognized address spaces. +define void @test_unrecognized(%struct.Foo* %f, i32 addrspace(14)* %i) { +; CHECK-LABEL: test_unrecognized +; CHECK: movl %edx, (%rcx) +entry: + %0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(270)* + %p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 0 + store i32 addrspace(270)* %0, i32 addrspace(270)** %p32, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} + +define void @test_unrecognized2(%struct.Foo* %f, i32 addrspace(271)* %i) { +; CHECK-LABEL: test_unrecognized2 +; CHECK: movl %edx, %eax +entry: + %0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(9)* + %p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i64 0, i32 2 + store i32 addrspace(9)* %0, i32 addrspace(9)** %p32, align 8 + tail call void @use_foo(%struct.Foo* %f) + ret void +} diff --git a/llvm/test/CodeGen/X86/mmx-reg-usage.ll b/llvm/test/CodeGen/X86/mmx-reg-usage.ll deleted file mode 100644 index a8d88c2e9f8e2..0000000000000 --- a/llvm/test/CodeGen/X86/mmx-reg-usage.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -march=x86-64 -mattr=+mmx -stop-after finalize-isel -o - %s | FileCheck %s -; This test ensures that the MXCSR is implicitly used by MMX FP instructions. - -define x86_mmx @mxcsr_usage(<4 x float> %a0) { - %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) - %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %1) - %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) - %4 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %3) - %5 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %4) - ret x86_mmx %5 -} - -declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) -declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) -declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) -declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) - -; CHECK: MMX_CVTPS2PIirr %{{[0-9]}}, implicit $mxcsr -; CHECK: MMX_CVTPI2PSirr %{{[0-9]}}, killed %{{[0-9]}}, implicit $mxcsr -; CHECK: MMX_CVTTPS2PIirr killed %{{[0-9]}}, implicit $mxcsr -; CHECK: MMX_CVTPI2PDirr killed %{{[0-9]$}} -; CHECK: MMX_CVTPD2PIirr killed %{{[0-9]}}, implicit $mxcsr diff --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll index 40f6b09288e05..a5050467ac1af 100644 --- a/llvm/test/CodeGen/X86/mul-i512.ll +++ b/llvm/test/CodeGen/X86/mul-i512.ll @@ -153,9 +153,8 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl 8(%esi), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 8(%ecx), %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll new file mode 100644 index 0000000000000..3bae883a8d9de --- /dev/null +++ b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=x86-64 -mattr=+mmx,+fma,+f16c,+avx512f -stop-after finalize-isel -o - %s | FileCheck %s +; This test ensures that the MXCSR is implicitly used by MMX FP instructions. + +define x86_mmx @mxcsr_mmx(<4 x float> %a0) { +; CHECK: MMX_CVTPS2PIirr %{{[0-9]}}, implicit $mxcsr +; CHECK: MMX_CVTPI2PSirr %{{[0-9]}}, killed %{{[0-9]}}, implicit $mxcsr +; CHECK: MMX_CVTTPS2PIirr killed %{{[0-9]}}, implicit $mxcsr +; CHECK: MMX_CVTPI2PDirr killed %{{[0-9]$}} +; CHECK: MMX_CVTPD2PIirr killed %{{[0-9]}}, implicit $mxcsr + %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) + %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %1) + %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) + %4 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %3) + %5 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %4) + ret x86_mmx %5 +} + +define half @mxcsr_f16c(float %a) { +; CHECK: VCVTPS2PH{{.*}}mxcsr +; CHECK: VCVTPH2PS{{.*}}mxcsr + %res = fptrunc float %a to half + ret half %res +} + +define <4 x float> @mxcsr_fma_ss(<4 x float> %a, <4 x float> %b) { +; CHECK: VFMADD{{.*}}mxcsr + %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> +%a) + ret <4 x float> %res +} + +define <4 x float> @mxcsr_fma_ps(<4 x float> %a, <4 x float> %b) { +; CHECK: VFMADD{{.*}}mxcsr + %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> +%a) + ret <4 x float> %res +} + +define <8 x double> @mxcsr_fma_sae(<8 x double> %a, <8 x double> %b, <8 x double> %c) { +; CHECK: VFMADD{{.*}}mxcsr + %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 -1, i32 10) + ret <8 x double> %res +} + +declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) +declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) +declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) +declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) +declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) +declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) +declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll index 2da9413a9a0cf..484104da9ff47 100644 --- a/llvm/test/CodeGen/X86/pr37916.ll +++ b/llvm/test/CodeGen/X86/pr37916.ll @@ -7,7 +7,6 @@ define void @fn1() local_unnamed_addr { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %if.end ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl a+4, %eax diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll index bb51aced225c6..310a173f824e9 100644 --- a/llvm/test/CodeGen/X86/pr42905.ll +++ b/llvm/test/CodeGen/X86/pr42905.ll @@ -11,7 +11,7 @@ define <4 x double> @autogen_SD30452(i1 %L230) { ; CHECK-NEXT: movq %xmm2, %rax ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2sd %rax, %xmm2 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll index 9916252e6c499..941f45d2d99a2 100644 --- a/llvm/test/CodeGen/X86/pr44140.ll +++ b/llvm/test/CodeGen/X86/pr44140.ll @@ -10,7 +10,6 @@ define win64cc void @opaque() { ; We need xmm6 to be live from the loop header across all iterations of the loop. ; We shouldn't clobber ymm6 inside the loop. -; FIXME: We currently clobber ymm6 define i32 @main() { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %start @@ -23,7 +22,7 @@ define i32 @main() { ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm6 +; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3 ; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) @@ -31,10 +30,10 @@ define i32 @main() { ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5 diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 9238ab0bf89f7..92708d33924f0 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 -; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 +; CHECK-NEXT: .cfi_def_cfa_offset 608 ; CHECK-NEXT: .cfi_offset %rbx, -56 ; CHECK-NEXT: .cfi_offset %r12, -48 ; CHECK-NEXT: .cfi_offset %r13, -40 @@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: LBB0_4: ## %cleanup -; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 +; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r13 @@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %rbp +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx -; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx +; CHECK-NEXT: leaq 8(%rcx,%rax), %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl @@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: testb %r14b, %r14b +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: testb %bpl, %bpl ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: leaq {{.*}}(%rip), %r13 +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: movl %ebp, %r12d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: movl %r12d, %ebp ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: leal -268(%rbp), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: movslq (%r13,%rax,4), %rax +; CHECK-NEXT: addq %r13, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %ebp, %r12d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r14), %eax +; CHECK-NEXT: leal 1(%rbp), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r13d -; CHECK-NEXT: movslq (%rsi,%rax,4), %rax -; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: movl $-1, %r12d +; CHECK-NEXT: leaq {{.*}}(%rip), %rcx +; CHECK-NEXT: movslq (%rcx,%rax,4), %rax +; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r13d +; CHECK-NEXT: movl $1, %r12d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: ## implicit-def: $r14 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: ## implicit-def: $r14 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: leaq 1(%r14), %rax +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 @@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq %rax, %r12 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movq %rax, %r14 +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_19: ## %sw.bb243 +; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: movl $20, %r12d ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: movl $2, %r12d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r12 +; CHECK-NEXT: incq %r14 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r13), %eax +; CHECK-NEXT: leal -324(%r12), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r13d +; CHECK-NEXT: cmpl $11, %r12d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r13d +; CHECK-NEXT: cmpl $24, %r12d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movb $0, (%r12) -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $0, (%r14) +; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r12d, %r12d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r13d -; CHECK-NEXT: cmpl $16, %r13d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl $16, %r12d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: btl %r12d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: subq %rbp, %rbx +; CHECK-NEXT: subq %rbx, %r14 ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax -; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax +; CHECK-NEXT: leaq 8189(%r14,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 +; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +; CHECK-NEXT: movq %r14, %rbx +; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_41 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_17: ## %for.body643.us +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_17 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_41: ## %while.cond661 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_41 ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 ; CHECK-NEXT: cmpq %rax, %rax @@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl %ebp -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: LBB0_53: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq (%rbx), %rdi +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; CHECK-NEXT: movq (%rax), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq %ebp, %rax -; CHECK-NEXT: leal 1(%rax), %ebp +; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload +; CHECK-NEXT: leal 1(%rax), %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: cmpq %rax, %rax ; CHECK-NEXT: jl LBB0_53 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader @@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: LBB0_55: ## %if.then.i ; CHECK-NEXT: ud2 -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload -; CHECK-NEXT: movq %rbx, %rbp -; CHECK-NEXT: jmp LBB0_48 -; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_41 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_17: ## %for.body643.us -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_17 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_41: ## %while.cond661 -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_41 entry: %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 %old = alloca [512 x i8], align 16 diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll index 9f9d75cb36ca8..6b29bd2207afe 100644 --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -296,8 +296,7 @@ define void @test_shl_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll %cl, %ebx ; X86-NEXT: movl %ebp, %esi @@ -534,8 +533,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur ; X86-NEXT: .LBB6_9: # %entry ; X86-NEXT: movl %edi, %esi ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: shrl %cl, %ebp ; X86-NEXT: testb $32, %cl @@ -795,9 +793,8 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur ; X86-NEXT: # %bb.4: # %entry ; X86-NEXT: movl %edi, %ebx ; X86-NEXT: .LBB7_5: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %edi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl %cl, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -835,8 +832,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur ; X86-NEXT: movl %esi, %edi ; X86-NEXT: .LBB7_9: # %entry ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sarl %cl, %esi ; X86-NEXT: testb $32, %cl @@ -850,8 +846,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb $64, %cl ; X86-NEXT: subb %dl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: shldl %cl, %ebx, %ebp ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1059,12 +1054,11 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture ; X86-NEXT: pushl %esi ; X86-NEXT: subl $72, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl %eax, %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: movl %edx, %eax ; X86-NEXT: subl $64, %eax @@ -1130,9 +1124,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture ; X86-NEXT: movl %ecx, %ebp ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %dl ; X86-NEXT: movl $0, %edi @@ -1210,8 +1202,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture ; X86-NEXT: movl %edi, %ecx ; X86-NEXT: .LBB8_23: # %entry ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll %cl, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill diff --git a/llvm/test/CodeGen/X86/sqrt-partial.ll b/llvm/test/CodeGen/X86/sqrt-partial.ll index 7ed68c1084998..48914d8ed44e0 100644 --- a/llvm/test/CodeGen/X86/sqrt-partial.ll +++ b/llvm/test/CodeGen/X86/sqrt-partial.ll @@ -38,7 +38,7 @@ define float @f(float %val) nounwind { define double @d(double %val) nounwind { ; SSE-LABEL: d: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: xorpd %xmm1, %xmm1 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: jb .LBB1_2 ; SSE-NEXT: # %bb.1: # %.split @@ -49,7 +49,7 @@ define double @d(double %val) nounwind { ; ; AVX-LABEL: d: ; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vucomisd %xmm1, %xmm0 ; AVX-NEXT: jb .LBB1_2 ; AVX-NEXT: # %bb.1: # %.split diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll index 4f26db8869d92..7ce16bbc3d420 100644 --- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -98,8 +98,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; X86-NEXT: addl %esi, %ecx ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill diff --git a/llvm/test/CodeGen/X86/undef-label.ll b/llvm/test/CodeGen/X86/undef-label.ll index b4be383d55ddc..56e0ca907f8e1 100644 --- a/llvm/test/CodeGen/X86/undef-label.ll +++ b/llvm/test/CodeGen/X86/undef-label.ll @@ -11,7 +11,7 @@ define void @xyz() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $g, %eax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: jnp .LBB0_2 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index fc3233327a558..bf2ea5e067cc1 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -21,7 +21,7 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] @@ -125,13 +125,13 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] @@ -335,7 +335,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; SSE-LABEL: fptoui_2f64_to_4i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movd %ecx, %xmm1 @@ -409,7 +409,7 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -482,7 +482,7 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -734,13 +734,13 @@ define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 269879e7f1a31..1d0106b75a84f 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -27,8 +27,8 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_2i64_to_2f64: @@ -38,7 +38,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_2i64_to_2f64: @@ -47,7 +47,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_2f64: @@ -56,7 +56,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_2f64: @@ -65,7 +65,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_2i64_to_2f64: @@ -237,16 +237,16 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: cvtsi2sd %rax, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_4i64_to_4f64: @@ -256,14 +256,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2 ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1 -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE41-NEXT: retq ; ; AVX1-LABEL: sitofp_4i64_to_4f64: @@ -273,12 +273,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -289,12 +289,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -305,12 +305,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -321,12 +321,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -1204,7 +1204,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1235,7 +1235,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE2-NEXT: retq ; @@ -1274,7 +1274,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1304,7 +1304,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; @@ -1342,7 +1342,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1927,7 +1927,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2074,7 +2074,7 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2216,7 +2216,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -3023,7 +3023,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_2i64_to_2f64: @@ -3034,7 +3034,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_load_2i64_to_2f64: @@ -3044,7 +3044,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_2i64_to_2f64: @@ -3054,7 +3054,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: @@ -3064,7 +3064,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: @@ -3220,7 +3220,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 @@ -3228,7 +3228,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: cvtsi2sd %rax, %xmm2 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_4i64_to_4f64: @@ -3240,64 +3240,64 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2 ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1 -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_load_4i64_to_4f64: ; VEX: # %bb.0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 +; VEX-NEXT: vmovapd (%rdi), %xmm0 ; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 ; VEX-NEXT: vpextrq $1, %xmm1, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; VEX-NEXT: vmovq %xmm1, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; VEX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_4i64_to_4f64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovapd (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovapd (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -4288,7 +4288,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; VEX-LABEL: sitofp_load_8i64_to_8f32: ; VEX: # %bb.0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 +; VEX-NEXT: vmovaps (%rdi), %xmm0 ; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 ; VEX-NEXT: vmovdqa 32(%rdi), %xmm2 ; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4319,7 +4319,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512F-LABEL: sitofp_load_8i64_to_8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4350,7 +4350,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512VL-LABEL: sitofp_load_8i64_to_8f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4648,7 +4648,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; VEX-LABEL: uitofp_load_4i64_to_4f32: ; VEX: # %bb.0: ; VEX-NEXT: vmovdqa (%rdi), %xmm2 -; VEX-NEXT: vmovdqa 16(%rdi), %xmm0 +; VEX-NEXT: vmovaps 16(%rdi), %xmm0 ; VEX-NEXT: vpextrq $1, %xmm2, %rax ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB81_1 @@ -5167,7 +5167,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; VEX-LABEL: uitofp_load_8i64_to_8f32: ; VEX: # %bb.0: ; VEX-NEXT: vmovdqa (%rdi), %xmm1 -; VEX-NEXT: vmovdqa 16(%rdi), %xmm0 +; VEX-NEXT: vmovaps 16(%rdi), %xmm0 ; VEX-NEXT: vmovdqa 32(%rdi), %xmm4 ; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 ; VEX-NEXT: vpextrq $1, %xmm4, %rax @@ -5293,7 +5293,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512F-LABEL: uitofp_load_8i64_to_8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -5324,7 +5324,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512VL-LABEL: uitofp_load_8i64_to_8f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index b5dff70e234e4..8cf8cab8b79b1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2914,56 +2914,40 @@ define <8 x i16> @shuffle_extract_insert_double(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @shuffle_extract_concat_insert(<4 x i16> %lhsa, <4 x i16> %rhsa, <8 x i16> %b) { ; SSE2-LABEL: shuffle_extract_concat_insert: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: pextrw $2, %xmm1, %ecx -; SSE2-NEXT: pextrw $5, %xmm2, %edx -; SSE2-NEXT: pextrw $7, %xmm2, %esi -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,0,3,4,5,6,7] -; SSE2-NEXT: pinsrw $4, %ecx, %xmm0 -; SSE2-NEXT: pinsrw $5, %edx, %xmm0 -; SSE2-NEXT: pinsrw $6, %eax, %xmm0 -; SSE2-NEXT: pinsrw $7, %esi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuffle_extract_concat_insert: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pextrw $2, %xmm1, %eax -; SSSE3-NEXT: pextrw $5, %xmm2, %ecx -; SSSE3-NEXT: pextrw $7, %xmm2, %edx -; SSSE3-NEXT: movd %xmm1, %esi +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15] -; SSSE3-NEXT: pinsrw $4, %eax, %xmm0 -; SSSE3-NEXT: pinsrw $5, %ecx, %xmm0 -; SSSE3-NEXT: pinsrw $6, %esi, %xmm0 -; SSSE3-NEXT: pinsrw $7, %edx, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_extract_concat_insert: ; SSE41: # %bb.0: -; SSE41-NEXT: movd %xmm1, %eax -; SSE41-NEXT: pextrw $2, %xmm1, %ecx +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15] +; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15] ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15] -; SSE41-NEXT: pinsrw $4, %ecx, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5],xmm0[6,7] -; SSE41-NEXT: pinsrw $6, %eax, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_extract_concat_insert: ; AVX: # %bb.0: -; AVX-NEXT: vmovd %xmm1, %eax -; AVX-NEXT: vpextrw $2, %xmm1, %ecx -; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15] -; AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5],xmm0[6,7] -; AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15] +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX-NEXT: retq %a = shufflevector <4 x i16> %lhsa, <4 x i16> %rhsa, <8 x i32> %a0 = extractelement <8 x i16> %a, i32 0 diff --git a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll new file mode 100644 index 0000000000000..9555ce032db90 --- /dev/null +++ b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-windows-gnu | FileCheck %s + +define void @foo() unnamed_addr #0 { +start: + %b = alloca i64, align 8 + %c = alloca [4294967295 x i8], align 1 + ret void +} + +attributes #0 = { nonlazybind uwtable "probe-stack"="probe_stack" "target-cpu"="x86-64" } + +; CHECK-LABEL: foo: +; CHECK: movabsq $4294967304, %rax +; CHECK-NEXT: callq probe_stack diff --git a/llvm/test/DebugInfo/Inputs/.build-id/ab/b50d82b6bdc861.debug b/llvm/test/DebugInfo/Inputs/.build-id/ab/b50d82b6bdc861.debug new file mode 100755 index 0000000000000..5eafa358a9c69 Binary files /dev/null and b/llvm/test/DebugInfo/Inputs/.build-id/ab/b50d82b6bdc861.debug differ diff --git a/llvm/test/DebugInfo/Inputs/dwarfdump-macro.dwo b/llvm/test/DebugInfo/Inputs/dwarfdump-macro.dwo new file mode 100644 index 0000000000000..5b0c16b745c40 Binary files /dev/null and b/llvm/test/DebugInfo/Inputs/dwarfdump-macro.dwo differ diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir index d2745874d5ef9..9001c8ba8eea2 100644 --- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir +++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir @@ -129,6 +129,19 @@ body: | --- name: callee tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } body: | bb.0: successors: %bb.2(0x30000000), %bb.1(0x50000000) diff --git a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir new file mode 100644 index 0000000000000..8ae628af2c099 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir @@ -0,0 +1,187 @@ +# RUN: llc -mtriple hexagon -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s + +# Based on the following C reproducer: +# +# int ga, gb, gc; +# +# extern void callee(int, int, int); +# +# void caller() { +# int a = ga; +# int b = gb; +# int c = gc; +# +# // Clobber all integer registers. +# __asm("" : : : +# "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", +# "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", +# "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28"); +# +# callee(a, b, c); +# } + +--- | + target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + target triple = "hexagon" + + @ga = common global i32 0, align 4 + @gb = common global i32 0, align 4 + @gc = common global i32 0, align 4 + + ; Function Attrs: nounwind + define void @caller() #0 !dbg !12 { + entry: + %0 = load i32, i32* @ga, align 4, !dbg !15 + %1 = load i32, i32* @gb, align 4, !dbg !16 + %2 = load i32, i32* @gc, align 4, !dbg !17 + call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28}"(), !dbg !18, !srcloc !19 + call void @callee(i32 %0, i32 %1, i32 %2), !dbg !20 + ret void, !dbg !21 + } + + declare !dbg !4 void @callee(i32, i32, i32) + + attributes #0 = { nounwind } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10} + !llvm.ident = !{!11} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, globals: !2, nameTableKind: None) + !1 = !DIFile(filename: "h.c", directory: "/") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "callee", scope: !1, file: !1, line: 3, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{null, !7, !7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 7, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "caller", scope: !1, file: !1, line: 5, type: !13, scopeLine: 5, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) + !13 = !DISubroutineType(types: !14) + !14 = !{null} + !15 = !DILocation(line: 6, scope: !12) + !16 = !DILocation(line: 7, scope: !12) + !17 = !DILocation(line: 8, scope: !12) + !18 = !DILocation(line: 11, scope: !12) + !19 = !{i32 158} + !20 = !DILocation(line: 16, scope: !12) + !21 = !DILocation(line: 17, scope: !12) + +... +--- +name: caller +tracksRegLiveness: true +frameInfo: + stackSize: 64 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +fixedStack: + - { id: 0, type: spill-slot, offset: -48, size: 8, alignment: 8, callee-saved-register: '$d13' } + - { id: 1, type: spill-slot, offset: -40, size: 8, alignment: 8, callee-saved-register: '$d12' } + - { id: 2, type: spill-slot, offset: -32, size: 8, alignment: 8, callee-saved-register: '$d11' } + - { id: 3, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$d10' } + - { id: 4, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '$d9' } + - { id: 5, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '$d8' } +stack: + - { id: 0, type: spill-slot, offset: -52, size: 4, alignment: 4 } + - { id: 1, type: spill-slot, offset: -56, size: 4, alignment: 4 } + - { id: 2, type: spill-slot, offset: -60, size: 4, alignment: 4 } +callSites: + - { bb: 0, offset: 40, fwdArgRegs: + - { arg: 0, reg: '$r0' } + - { arg: 1, reg: '$r1' } + - { arg: 2, reg: '$r2' } } +body: | + bb.0.entry: + liveins: $d8, $d9, $d10, $d11, $d12, $d13, $d8, $d9, $d10, $d11, $d12, $d13 + + BUNDLE implicit-def $r29, implicit-def $r30, implicit $r29, implicit killed $framekey, implicit killed $framelimit, implicit killed $r30, implicit killed $r31, implicit killed $d8, debug-location !15 { + $r29 = S2_allocframe $r29, 64, implicit-def $r30, implicit killed $framekey, implicit killed $framelimit, implicit killed $r30, implicit killed $r31, debug-location !15 :: (store 4 into stack) + S2_storerd_io internal $r29, -16, killed $d8, debug-location !15 :: (store 8 into %fixed-stack.5) + } + CFI_INSTRUCTION def_cfa $r30, 8 + CFI_INSTRUCTION offset $r31, -4 + CFI_INSTRUCTION offset $r30, -8 + CFI_INSTRUCTION offset $r17, -12 + CFI_INSTRUCTION offset $r16, -16 + CFI_INSTRUCTION offset $r19, -20 + CFI_INSTRUCTION offset $r18, -24 + CFI_INSTRUCTION offset $r21, -28 + CFI_INSTRUCTION offset $r20, -32 + CFI_INSTRUCTION offset $r23, -36 + CFI_INSTRUCTION offset $r22, -40 + CFI_INSTRUCTION offset $r25, -44 + CFI_INSTRUCTION offset $r24, -48 + CFI_INSTRUCTION offset $r27, -52 + CFI_INSTRUCTION offset $r26, -56 + BUNDLE implicit $r29, implicit killed $d9, implicit killed $d10, debug-location !15 { + S2_storerd_io $r29, 48, killed $d9, debug-location !15 :: (store 8 into %fixed-stack.4) + S2_storerd_io $r29, 40, killed $d10, debug-location !15 :: (store 8 into %fixed-stack.3) + } + BUNDLE implicit $r29, implicit killed $d11, implicit killed $d12, debug-location !15 { + S2_storerd_io $r29, 32, killed $d11, debug-location !15 :: (store 8 into %fixed-stack.2) + S2_storerd_io $r29, 24, killed $d12, debug-location !15 :: (store 8 into %fixed-stack.1) + } + BUNDLE implicit-def $r0, implicit $r29, implicit killed $d13, implicit $gp, debug-location !15 { + S2_storerd_io $r29, 16, killed $d13, debug-location !15 :: (store 8 into %fixed-stack.0) + renamable $r0 = L2_loadrigp @ga, implicit $gp, debug-location !15 :: (dereferenceable load 4 from @ga) + } + BUNDLE implicit-def $r0, implicit $r29, implicit killed $r0, implicit $gp, debug-location !16 { + S2_storeri_io $r29, 12, killed renamable $r0, debug-location !16 :: (store 4 into %stack.0) + renamable $r0 = L2_loadrigp @gb, implicit $gp, debug-location !16 :: (dereferenceable load 4 from @gb) + } + BUNDLE implicit-def $r0, implicit $r29, implicit killed $r0, implicit killed $gp, debug-location !17 { + S2_storeri_io $r29, 8, killed renamable $r0, debug-location !17 :: (store 4 into %stack.1) + renamable $r0 = L2_loadrigp @gc, implicit killed $gp, debug-location !17 :: (dereferenceable load 4 from @gc) + } + S2_storeri_io $r29, 4, killed renamable $r0, debug-location !18 :: (store 4 into %stack.2) + INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0, 12, implicit-def dead early-clobber $r1, 12, implicit-def dead early-clobber $r2, 12, implicit-def dead early-clobber $r3, 12, implicit-def dead early-clobber $r4, 12, implicit-def dead early-clobber $r5, 12, implicit-def dead early-clobber $r6, 12, implicit-def dead early-clobber $r7, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15, 12, implicit-def dead early-clobber $r16, 12, implicit-def dead early-clobber $r17, 12, implicit-def dead early-clobber $r18, 12, implicit-def dead early-clobber $r19, 12, implicit-def dead early-clobber $r20, 12, implicit-def dead early-clobber $r21, 12, implicit-def dead early-clobber $r22, 12, implicit-def dead early-clobber $r23, 12, implicit-def dead early-clobber $r24, 12, implicit-def dead early-clobber $r25, 12, implicit-def dead early-clobber $r26, 12, implicit-def dead early-clobber $r27, 12, implicit-def dead early-clobber $r28, !19, debug-location !18 + BUNDLE implicit-def $r0, implicit-def $r1, implicit $r29, debug-location !20 { + $r0 = L2_loadri_io $r29, 12, debug-location !20 :: (load 4 from %stack.0) + $r1 = L2_loadri_io $r29, 8, debug-location !20 :: (load 4 from %stack.1) + } + BUNDLE implicit-def dead $r2, implicit-def dead $pc, implicit-def dead $r31, implicit-def $r29, implicit $r29, implicit killed $r0, implicit killed $r1, debug-location !20 { + $r2 = L2_loadri_io $r29, 4, debug-location !20 :: (load 4 from %stack.2) + J2_call @callee, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit killed $r0, implicit killed $r1, implicit internal killed $r2, implicit-def $r29, debug-location !20 + } + BUNDLE implicit-def $d8, implicit-def $r16, implicit-def $r17, implicit-def $d9, implicit-def $r18, implicit-def $r19, implicit $r29, debug-location !21 { + $d8 = L2_loadrd_io $r29, 56, debug-location !21 :: (load 8 from %fixed-stack.5) + $d9 = L2_loadrd_io $r29, 48, debug-location !21 :: (load 8 from %fixed-stack.4) + } + BUNDLE implicit-def $d10, implicit-def $r20, implicit-def $r21, implicit-def $d11, implicit-def $r22, implicit-def $r23, implicit $r29, debug-location !21 { + $d10 = L2_loadrd_io $r29, 40, debug-location !21 :: (load 8 from %fixed-stack.3) + $d11 = L2_loadrd_io $r29, 32, debug-location !21 :: (load 8 from %fixed-stack.2) + } + BUNDLE implicit-def $d12, implicit-def $r24, implicit-def $r25, implicit-def $d13, implicit-def $r26, implicit-def $r27, implicit killed $r29, debug-location !21 { + $d12 = L2_loadrd_io $r29, 24, debug-location !21 :: (load 8 from %fixed-stack.1) + $d13 = L2_loadrd_io killed $r29, 16, debug-location !21 :: (load 8 from %fixed-stack.0) + } + $d15 = L4_return killed $r30, implicit-def $pc, implicit-def $r29, implicit killed $framekey, implicit-def dead $pc, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, debug-location !21 + +... + +# Verify that call site entries are emitted for all three parameters. +# Previously the code that's looking for instructions to describe parameters +# with would stop when reaching the bundle header for the bundled call, +# resulting in $r0 and $r1 not being described. +# +# Please note that at the time of creating this test the Hexagon target did not +# support call site information, so the "callSites" array has been manually +# added. + +# CHECK: DW_TAG_GNU_call_site_parameter +# CHECK-NEXT: DW_AT_location (DW_OP_reg2 R2) +# CHECK-NEXT: DW_AT_GNU_call_site_value (DW_OP_breg29 R29+4, DW_OP_deref_size 0x4) + +# CHECK: DW_TAG_GNU_call_site_parameter +# CHECK-NEXT: DW_AT_location (DW_OP_reg1 R1) +# CHECK-NEXT: DW_AT_GNU_call_site_value (DW_OP_breg29 R29+8, DW_OP_deref_size 0x4) + +# CHECK: DW_TAG_GNU_call_site_parameter +# CHECK-NEXT: DW_AT_location (DW_OP_reg0 R0) +# CHECK-NEXT: DW_AT_GNU_call_site_value (DW_OP_breg29 R29+12, DW_OP_deref_size 0x4) diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir new file mode 100644 index 0000000000000..8d121c3a30b91 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir @@ -0,0 +1,121 @@ +# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s +# +#extern void fn1 (int, int, int); +# +#__attribute__((noinline)) +#int +#fn2 (int a, int b, int c) { +# int q = 2 + a; +# +# fn1 (5, 6, q); +# +# b = b + 7; +# if (b < 17) +# return 1; +# else +# return 0; +#} +# +# CHECK: ![[ARG_A:.*]] = !DILocalVariable(name: "a" +# CHECK: ![[ARG_B:.*]] = !DILocalVariable(name: "b" +# CHECK: ![[ARG_C:.*]] = !DILocalVariable(name: "c" +# CHECK: DBG_VALUE $edi, $noreg, ![[ARG_A]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: DBG_VALUE $edx, $noreg, ![[ARG_C]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: DBG_VALUE $edi, $noreg, ![[ARG_A]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK-NOT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# +--- | + ; ModuleID = 'test.c' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + ; Function Attrs: noinline nounwind uwtable + define dso_local i32 @fn2(i32 %a, i32 %b, i32 %c) local_unnamed_addr !dbg !12 { + entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !16, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %b, metadata !17, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %c, metadata !18, metadata !DIExpression()), !dbg !20 + %add = add nsw i32 %a, 2, !dbg !21 + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 6, i32 %add), !dbg !22 + call void @llvm.dbg.value(metadata i32 %b, metadata !17, metadata !DIExpression(DW_OP_plus_uconst, 7, DW_OP_stack_value)), !dbg !20 + %cmp = icmp slt i32 %b, 10, !dbg !23 + %. = zext i1 %cmp to i32, !dbg !25 + ret i32 %., !dbg !26 + } + + declare !dbg !4 dso_local void @fn1(i32, i32, i32) local_unnamed_addr + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10} + !llvm.ident = !{!11} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/dir") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{null, !7, !7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 2, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 5, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!7, !7, !7, !7} + !15 = !{!16, !17, !18, !19} + !16 = !DILocalVariable(name: "a", arg: 1, scope: !12, file: !1, line: 5, type: !7) + !17 = !DILocalVariable(name: "b", arg: 2, scope: !12, file: !1, line: 5, type: !7) + !18 = !DILocalVariable(name: "c", arg: 3, scope: !12, file: !1, line: 5, type: !7) + !19 = !DILocalVariable(name: "q", scope: !12, file: !1, line: 7, type: !7) + !20 = !DILocation(line: 0, scope: !12) + !21 = !DILocation(line: 7, column: 15, scope: !12) + !22 = !DILocation(line: 9, column: 5, scope: !12) + !23 = !DILocation(line: 12, column: 11, scope: !24) + !24 = distinct !DILexicalBlock(scope: !12, file: !1, line: 12, column: 9) + !25 = !DILocation(line: 0, scope: !24) + !26 = !DILocation(line: 16, column: 1, scope: !12) + +... +--- +name: fn2 +alignment: 16 +callSites: + - { bb: 0, offset: 14, fwdArgRegs: + - { arg: 0, reg: '$edi' } + - { arg: 1, reg: '$esi' } + - { arg: 2, reg: '$edx' } } +body: | + bb.0.entry: + liveins: $edi, $esi, $rbx + + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + DBG_VALUE $esi, $noreg, !17, !DIExpression(), debug-location !20 + DBG_VALUE $edx, $noreg, !18, !DIExpression(), debug-location !20 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + CFI_INSTRUCTION offset $rbx, -16 + $ebx = MOV32rr $esi + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + renamable $edi = KILL $edi, implicit-def $rdi + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + renamable $edx = LEA64_32r killed renamable $rdi, 1, $noreg, 2, $noreg, debug-location !21 + DBG_VALUE $edx, $noreg, !19, !DIExpression(), debug-location !20 + $edi = MOV32ri 5, debug-location !22 + $esi = MOV32ri 6, debug-location !22 + CALL64pcrel32 @fn1, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit-def $rsp, implicit-def $ssp, debug-location !22 + DBG_VALUE $ebx, $noreg, !17, !DIExpression(DW_OP_plus_uconst, 7, DW_OP_stack_value), debug-location !20 + renamable $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !23 + CMP32ri8 killed renamable $ebx, 10, implicit-def $eflags, debug-location !23 + renamable $al = SETCCr 12, implicit killed $eflags, implicit killed $eax, implicit-def $eax, debug-location !23 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !26 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !26 + RETQ $eax, debug-location !26 + +... diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir new file mode 100644 index 0000000000000..2396daada876e --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir @@ -0,0 +1,179 @@ +# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s +# +# The test case was artificially adjusted, in order to make proper diamond basic +# block structure relevant to the debug entry values propagation. +# +# CHECK: ![[ARG_B:.*]] = !DILocalVariable(name: "b" +# CHECK: bb.0.entry +# CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() +# CHECK: bb.1.if.then +# CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() +# CHECK: $ebx = MOV32rr $esi +# CHECK-NEXT: DBG_VALUE $ebx, $noreg, ![[ARG_B]], !DIExpression() +# CHECK-NEXT: $esi = MOV32ri 5 +# CHECK-NEXT: $ebx = MOV32ri 1 +# CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.2.if.else +# CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() +# CHECK: $ebx = MOV32rr $esi +# CHECK-NEXT: DBG_VALUE $ebx, $noreg, ![[ARG_B]], !DIExpression() +# CHECK-NEXT: $esi = MOV32ri 1 +# CHECK-NEXT: $ebx = MOV32ri 2 +# CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.3.if.end +# CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# +--- | + ; ModuleID = 'test.c' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + ; Function Attrs: noinline nounwind uwtable + define dso_local i32 @fn2(i32 %a, i32 %b, i32 %c) local_unnamed_addr !dbg !12 { + entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !16, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %b, metadata !17, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %c, metadata !18, metadata !DIExpression()), !dbg !20 + %add = add nsw i32 %a, 2, !dbg !21 + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 6, i32 %add) #3, !dbg !22 + %cmp = icmp slt i32 %b, 17, !dbg !23 + br i1 %cmp, label %if.then, label %if.else, !dbg !25 + + if.then: ; preds = %entry + %add1 = add nsw i32 %b, 7, !dbg !26 + call void @llvm.dbg.value(metadata i32 %add1, metadata !17, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 %add1, i32 %c) #3, !dbg !28 + br label %if.end, !dbg !29 + + if.else: ; preds = %entry + %add2 = add nuw nsw i32 %b, 1, !dbg !30 + call void @llvm.dbg.value(metadata i32 %add2, metadata !17, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 1, i32 %add2, i32 %c) #3, !dbg !32 + br label %if.end + + if.end: ; preds = %if.else, %if.then + %b.addr.0 = phi i32 [ %add1, %if.then ], [ %add2, %if.else ], !dbg !33 + call void @llvm.dbg.value(metadata i32 %b.addr.0, metadata !17, metadata !DIExpression()), !dbg !20 + ret i32 %b.addr.0, !dbg !34 + } + + declare !dbg !4 dso_local void @fn1(i32, i32, i32) local_unnamed_addr + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10} + !llvm.ident = !{!11} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/dir") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{null, !7, !7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 2, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 5, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!7, !7, !7, !7} + !15 = !{!16, !17, !18, !19} + !16 = !DILocalVariable(name: "a", arg: 1, scope: !12, file: !1, line: 5, type: !7) + !17 = !DILocalVariable(name: "b", arg: 2, scope: !12, file: !1, line: 5, type: !7) + !18 = !DILocalVariable(name: "c", arg: 3, scope: !12, file: !1, line: 5, type: !7) + !19 = !DILocalVariable(name: "q", scope: !12, file: !1, line: 7, type: !7) + !20 = !DILocation(line: 0, scope: !12) + !21 = !DILocation(line: 7, column: 15, scope: !12) + !22 = !DILocation(line: 9, column: 5, scope: !12) + !23 = !DILocation(line: 11, column: 11, scope: !24) + !24 = distinct !DILexicalBlock(scope: !12, file: !1, line: 11, column: 9) + !25 = !DILocation(line: 11, column: 9, scope: !12) + !26 = !DILocation(line: 12, column: 13, scope: !27) + !27 = distinct !DILexicalBlock(scope: !24, file: !1, line: 11, column: 17) + !28 = !DILocation(line: 13, column: 8, scope: !27) + !29 = !DILocation(line: 14, column: 5, scope: !27) + !30 = !DILocation(line: 15, column: 13, scope: !31) + !31 = distinct !DILexicalBlock(scope: !24, file: !1, line: 14, column: 12) + !32 = !DILocation(line: 16, column: 7, scope: !31) + !33 = !DILocation(line: 0, scope: !24) + !34 = !DILocation(line: 19, column: 5, scope: !12) + +... +--- +name: fn2 +alignment: 16 +fixedStack: + - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default, + callee-saved-register: '$rbx', callee-saved-restored: true, debug-info-variable: '', + debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default, + callee-saved-register: '$rbp', callee-saved-restored: true, debug-info-variable: '', + debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $edi, $edx, $esi, $rbp, $rbx + + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + DBG_VALUE $esi, $noreg, !17, !DIExpression(), debug-location !20 + DBG_VALUE $edx, $noreg, !18, !DIExpression(), debug-location !20 + frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 24 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 32 + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION offset $rbp, -16 + $ebp = MOV32rr $edx + DBG_VALUE $ebp, $noreg, !18, !DIExpression(), debug-location !20 + renamable $edi = KILL $edi, implicit-def $rdi + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + renamable $edx = LEA64_32r killed renamable $rdi, 1, $noreg, 2, $noreg, debug-location !21 + DBG_VALUE $edx, $noreg, !19, !DIExpression(), debug-location !20 + $edi = MOV32ri 5, debug-location !22 + CMP32ri8 renamable $ebp, 16, implicit-def $eflags, debug-location !23 + JCC_1 %bb.2, 15, implicit killed $eflags, debug-location !25 + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $ebp, $ebx, $esi + + $ebx = MOV32rr $esi + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $esi = MOV32ri 5, debug-location !28 + $ebx = MOV32ri 1 + JMP_1 %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $ebp, $ebx, $esi + + $ebx = MOV32rr $esi + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $esi = MOV32ri 1, debug-location !32 + $ebx = MOV32ri 2 + + bb.3.if.end: + liveins: $ebx, $edi, $ebp + + $esi = MOV32rr $ebx, debug-location !33 + $edx = MOV32rr killed $ebp, debug-location !33 + CALL64pcrel32 @fn1, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit-def $rsp, implicit-def $ssp, debug-location !33 + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $eax = MOV32rr killed $ebx, debug-location !34 + $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 24, debug-location !34 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 16, debug-location !34 + $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !34 + RETQ killed $eax, debug-location !34 + +... diff --git a/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir new file mode 100644 index 0000000000000..0109dc47ef36d --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir @@ -0,0 +1,180 @@ +# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s +# +# The test case was artificially adjusted, in order to make proper diamond basic +# block structure relevant to the debug entry values clobbering. +# +# CHECK: ![[ARG_B:.*]] = !DILocalVariable(name: "b" +# CHECK: bb.0.entry +# CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() +# CHECK: bb.1.if.then +# CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() +# CHECK: $ebx = MOV32rr $esi +# CHECK-NEXT: DBG_VALUE $ebx, $noreg, ![[ARG_B]], !DIExpression() +# CHECK-NEXT: $esi = MOV32ri 5 +# CHECK-NEXT: $ebx = MOV32ri 1 +# CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.2.if.else +# CHECK: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression() +# CHECK: $ebp = MOV32rr $esi +# CHECK: DBG_VALUE $ebp, $noreg, ![[ARG_B]], !DIExpression() +# CHECK-NEXT: $esi = MOV32ri 1 +# CHECK-NEXT: $ebp = MOV32ri 2 +# CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.3.if.end +# CHECK-NOT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# +--- | + ; ModuleID = 'test.c' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + ; Function Attrs: noinline nounwind uwtable + define dso_local i32 @fn2(i32 %a, i32 %b, i32 %c) local_unnamed_addr !dbg !12 { + entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !16, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %b, metadata !17, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %c, metadata !18, metadata !DIExpression()), !dbg !20 + %add = add nsw i32 %a, 2, !dbg !21 + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 6, i32 %add), !dbg !22 + %cmp = icmp slt i32 %b, 17, !dbg !23 + br i1 %cmp, label %if.then, label %if.else, !dbg !25 + + if.then: ; preds = %entry + %add1 = add nsw i32 %b, 7, !dbg !26 + call void @llvm.dbg.value(metadata i32 %add1, metadata !17, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 %add1, i32 %c), !dbg !28 + br label %if.end, !dbg !29 + + if.else: ; preds = %entry + %add2 = add nuw nsw i32 %b, 1, !dbg !30 + call void @llvm.dbg.value(metadata i32 %add2, metadata !17, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 1, i32 %add2, i32 %c), !dbg !32 + br label %if.end + + if.end: ; preds = %if.else, %if.then + %b.addr.0 = phi i32 [ %add1, %if.then ], [ %add2, %if.else ], !dbg !33 + call void @llvm.dbg.value(metadata i32 %b.addr.0, metadata !17, metadata !DIExpression()), !dbg !20 + ret i32 %b.addr.0, !dbg !34 + } + + declare !dbg !4 dso_local void @fn1(i32, i32, i32) local_unnamed_addr + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10} + !llvm.ident = !{!11} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/dir") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{null, !7, !7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 2, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 5, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!7, !7, !7, !7} + !15 = !{!16, !17, !18, !19} + !16 = !DILocalVariable(name: "a", arg: 1, scope: !12, file: !1, line: 5, type: !7) + !17 = !DILocalVariable(name: "b", arg: 2, scope: !12, file: !1, line: 5, type: !7) + !18 = !DILocalVariable(name: "c", arg: 3, scope: !12, file: !1, line: 5, type: !7) + !19 = !DILocalVariable(name: "q", scope: !12, file: !1, line: 7, type: !7) + !20 = !DILocation(line: 0, scope: !12) + !21 = !DILocation(line: 7, column: 15, scope: !12) + !22 = !DILocation(line: 9, column: 5, scope: !12) + !23 = !DILocation(line: 11, column: 11, scope: !24) + !24 = distinct !DILexicalBlock(scope: !12, file: !1, line: 11, column: 9) + !25 = !DILocation(line: 11, column: 9, scope: !12) + !26 = !DILocation(line: 12, column: 13, scope: !27) + !27 = distinct !DILexicalBlock(scope: !24, file: !1, line: 11, column: 17) + !28 = !DILocation(line: 13, column: 8, scope: !27) + !29 = !DILocation(line: 14, column: 5, scope: !27) + !30 = !DILocation(line: 15, column: 13, scope: !31) + !31 = distinct !DILexicalBlock(scope: !24, file: !1, line: 14, column: 12) + !32 = !DILocation(line: 16, column: 7, scope: !31) + !33 = !DILocation(line: 0, scope: !24) + !34 = !DILocation(line: 19, column: 5, scope: !12) + +... +--- +name: fn2 +alignment: 16 +fixedStack: + - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default, + callee-saved-register: '$rbx', callee-saved-restored: true, debug-info-variable: '', + debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default, + callee-saved-register: '$rbp', callee-saved-restored: true, debug-info-variable: '', + debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $edi, $edx, $esi, $rbp, $rbx + + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + DBG_VALUE $esi, $noreg, !17, !DIExpression(), debug-location !20 + DBG_VALUE $edx, $noreg, !18, !DIExpression(), debug-location !20 + frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 24 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 32 + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION offset $rbp, -16 + $ebp = MOV32rr $edx + DBG_VALUE $ebp, $noreg, !18, !DIExpression(), debug-location !20 + renamable $edi = KILL $edi, implicit-def $rdi + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + renamable $edx = LEA64_32r killed renamable $rdi, 1, $noreg, 2, $noreg, debug-location !21 + DBG_VALUE $edx, $noreg, !19, !DIExpression(), debug-location !20 + $edi = MOV32ri 5, debug-location !22 + CMP32ri8 renamable $ebp, 16, implicit-def $eflags, debug-location !23 + JCC_1 %bb.2, 15, implicit killed $eflags, debug-location !25 + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $ebp, $ebx, $esi + + $ebx = MOV32rr $esi + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $esi = MOV32ri 5, debug-location !28 + $ebx = MOV32ri 1 + JMP_1 %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $ebp, $ebx, $esi + + $ebp = MOV32rr $esi + DBG_VALUE $ebp, $noreg, !17, !DIExpression(), debug-location !20 + $esi = MOV32ri 1, debug-location !32 + $ebp = MOV32ri 2 + + bb.3.if.end: + liveins: $ebx, $edi, $ebp + + $esi = MOV32rr $ebx, debug-location !33 + $edx = MOV32rr killed $ebp, debug-location !33 + CALL64pcrel32 @fn1, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit-def $rsp, implicit-def $ssp, debug-location !33 + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $eax = MOV32rr killed $ebx, debug-location !34 + $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 24, debug-location !34 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 16, debug-location !34 + $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !34 + RETQ killed $eax, debug-location !34 + +... + diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir new file mode 100644 index 0000000000000..86b1cddaa462b --- /dev/null +++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir @@ -0,0 +1,184 @@ +# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s +# +#extern void fn1 (int, int, int); +#__attribute__((noinline)) +#int +#fn2 (int a, int b, int c) { +# int q = 2 + a; +# fn1 (5, 6, q); +# if (b < 17) { +# b = b + 7; +# fn1 (5, b, q); +# } else { +# b = b + 1; +# fn1 (1, b, q); +# } +# return b; +#} +# CHECK: ![[ARG_C:.*]] = !DILocalVariable(name: "c" +# CHECK: bb.0.entry: +# CHECK: DBG_VALUE $edx, $noreg, ![[ARG_C]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.1.if.then: +# CHECK: DBG_VALUE $edx, $noreg, ![[ARG_C]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.2.if.else: +# CHECK: DBG_VALUE $edx, $noreg, ![[ARG_C]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# CHECK: bb.3.if.end: +# CHECK: DBG_VALUE $edx, $noreg, ![[ARG_C]], !DIExpression(DW_OP_LLVM_entry_value, 1) +# +--- | + ; ModuleID = 'test.c' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + ; Function Attrs: noinline nounwind uwtable + define dso_local i32 @fn2(i32 %a, i32 %b, i32 %c) local_unnamed_addr !dbg !12 { + entry: + call void @llvm.dbg.value(metadata i32 %a, metadata !16, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %b, metadata !17, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 %c, metadata !18, metadata !DIExpression()), !dbg !20 + %add = add nsw i32 %a, 2, !dbg !21 + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 6, i32 %add), !dbg !22 + %cmp = icmp slt i32 %b, 17, !dbg !23 + br i1 %cmp, label %if.then, label %if.else, !dbg !25 + + if.then: ; preds = %entry + %add1 = add nsw i32 %b, 7, !dbg !26 + call void @llvm.dbg.value(metadata i32 %add1, metadata !17, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 5, i32 %add1, i32 %add), !dbg !28 + br label %if.end, !dbg !29 + + if.else: ; preds = %entry + %add2 = add nuw nsw i32 %b, 1, !dbg !30 + call void @llvm.dbg.value(metadata i32 %add2, metadata !17, metadata !DIExpression()), !dbg !20 + tail call void @fn1(i32 1, i32 %add2, i32 %add), !dbg !32 + br label %if.end + + if.end: ; preds = %if.else, %if.then + %b.addr.0 = phi i32 [ %add1, %if.then ], [ %add2, %if.else ], !dbg !33 + call void @llvm.dbg.value(metadata i32 %b.addr.0, metadata !17, metadata !DIExpression()), !dbg !20 + ret i32 %b.addr.0, !dbg !34 + } + + declare !dbg !4 dso_local void @fn1(i32, i32, i32) local_unnamed_addr + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!8, !9, !10} + !llvm.ident = !{!11} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/") + !2 = !{} + !3 = !{!4} + !4 = !DISubprogram(name: "fn1", scope: !1, file: !1, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2) + !5 = !DISubroutineType(types: !6) + !6 = !{null, !7, !7, !7} + !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !8 = !{i32 2, !"Dwarf Version", i32 4} + !9 = !{i32 2, !"Debug Info Version", i32 3} + !10 = !{i32 1, !"wchar_size", i32 4} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 5, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!7, !7, !7, !7} + !15 = !{!16, !17, !18, !19} + !16 = !DILocalVariable(name: "a", arg: 1, scope: !12, file: !1, line: 5, type: !7) + !17 = !DILocalVariable(name: "b", arg: 2, scope: !12, file: !1, line: 5, type: !7) + !18 = !DILocalVariable(name: "c", arg: 3, scope: !12, file: !1, line: 5, type: !7) + !19 = !DILocalVariable(name: "q", scope: !12, file: !1, line: 7, type: !7) + !20 = !DILocation(line: 0, scope: !12) + !21 = !DILocation(line: 7, column: 15, scope: !12) + !22 = !DILocation(line: 9, column: 5, scope: !12) + !23 = !DILocation(line: 11, column: 11, scope: !24) + !24 = distinct !DILexicalBlock(scope: !12, file: !1, line: 11, column: 9) + !25 = !DILocation(line: 11, column: 9, scope: !12) + !26 = !DILocation(line: 12, column: 13, scope: !27) + !27 = distinct !DILexicalBlock(scope: !24, file: !1, line: 11, column: 17) + !28 = !DILocation(line: 13, column: 8, scope: !27) + !29 = !DILocation(line: 14, column: 5, scope: !27) + !30 = !DILocation(line: 15, column: 13, scope: !31) + !31 = distinct !DILexicalBlock(scope: !24, file: !1, line: 14, column: 12) + !32 = !DILocation(line: 16, column: 7, scope: !31) + !33 = !DILocation(line: 0, scope: !24) + !34 = !DILocation(line: 19, column: 5, scope: !12) + +... +--- +name: fn2 +alignment: 16 +callSites: + - { bb: 0, offset: 20, fwdArgRegs: + - { arg: 0, reg: '$edi' } + - { arg: 1, reg: '$esi' } + - { arg: 2, reg: '$edx' } } + - { bb: 3, offset: 2, fwdArgRegs: + - { arg: 0, reg: '$edi' } + - { arg: 1, reg: '$esi' } + - { arg: 2, reg: '$edx' } } +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $edi, $esi, $rbp, $rbx + + DBG_VALUE $edi, $noreg, !16, !DIExpression(), debug-location !20 + DBG_VALUE $esi, $noreg, !17, !DIExpression(), debug-location !20 + DBG_VALUE $edx, $noreg, !18, !DIExpression(), debug-location !20 + frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 24 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 32 + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION offset $rbp, -16 + $ebx = MOV32rr $esi + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $ebp = MOV32rr $edi + DBG_VALUE $ebp, $noreg, !16, !DIExpression(), debug-location !20 + renamable $ebp = nsw ADD32ri8 killed renamable $ebp, 2, implicit-def dead $eflags, debug-location !21 + DBG_VALUE $ebp, $noreg, !19, !DIExpression(), debug-location !20 + $edi = MOV32ri 5, debug-location !22 + $esi = MOV32ri 6, debug-location !22 + $edx = MOV32rr $ebp, debug-location !22 + CALL64pcrel32 @fn1, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit killed $edx, implicit-def $rsp, implicit-def $ssp, debug-location !22 + CMP32ri8 renamable $ebx, 16, implicit-def $eflags, debug-location !23 + JCC_1 %bb.2, 15, implicit killed $eflags, debug-location !25 + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $ebp, $ebx + + renamable $ebx = nsw ADD32ri8 killed renamable $ebx, 7, implicit-def dead $eflags, debug-location !26 + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $edi = MOV32ri 5, debug-location !28 + JMP_1 %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $ebp, $ebx + + renamable $ebx = nuw nsw ADD32ri8 killed renamable $ebx, 1, implicit-def dead $eflags, debug-location !30 + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $edi = MOV32ri 1, debug-location !32 + + bb.3.if.end: + liveins: $ebx, $edi, $ebp + + $esi = MOV32rr $ebx, debug-location !33 + $edx = MOV32rr killed $ebp, debug-location !33 + CALL64pcrel32 @fn1, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit killed $edx, implicit-def $rsp, implicit-def $ssp, debug-location !33 + DBG_VALUE $ebx, $noreg, !17, !DIExpression(), debug-location !20 + $eax = MOV32rr killed $ebx, debug-location !34 + $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 24, debug-location !34 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 16, debug-location !34 + $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !34 + RETQ killed $eax, debug-location !34 + +... diff --git a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll index 24ba037762a9e..4a767aaf62355 100644 --- a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll +++ b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll @@ -1,19 +1,15 @@ -; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o - \ -; RUN: | llvm-readobj -r | FileCheck -check-prefix=RELAX %s -; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o - \ -; RUN: | llvm-dwarfdump --debug-frame - 2>&1 \ +; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o %t.o +; RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=RELAX %s +; RUN: llvm-dwarfdump --debug-frame %t.o 2>&1 \ ; RUN: | FileCheck -check-prefix=RELAX-DWARFDUMP %s ; -; RELAX: Section{{.*}}.rela.{{eh|debug}}_frame { -; RELAX-NOT: {{[}]}} -; RELAX-NOT: 0x0 R_RISCV_ADD32 -; RELAX-NOT: 0x0 R_RISCV_SUB32 -; RELAX-NOT: {{[}]}} -; RELAX: 0x20 R_RISCV_ADD32 -; RELAX: 0x20 R_RISCV_SUB32 -; RELAX-NOT: {{[}]}} -; RELAX: 0x39 R_RISCV_SET6 -; RELAX: 0x39 R_RISCV_SUB6 +; RELAX: Section ({{.*}}) .rela.eh_frame { +; RELAX-NEXT: 0x1C R_RISCV_32_PCREL - 0x0 +; RELAX-NEXT: 0x20 R_RISCV_ADD32 - 0x0 +; RELAX-NEXT: 0x20 R_RISCV_SUB32 - 0x0 +; RELAX-NOT: } +; RELAX: 0x39 R_RISCV_SET6 - 0x0 +; RELAX-NEXT: 0x39 R_RISCV_SUB6 - 0x0 ; ; RELAX-DWARFDUMP-NOT: error: failed to compute relocation ; RELAX-DWARFDUMP: CIE diff --git a/llvm/test/DebugInfo/X86/debug-info-template-align.ll b/llvm/test/DebugInfo/X86/debug-info-template-align.ll new file mode 100644 index 0000000000000..160d88cd0cc94 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-info-template-align.ll @@ -0,0 +1,63 @@ +; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump -v - | FileCheck %s + +; C++ source to regenerate: + +;typedef char __attribute__((__aligned__(64))) alchar; + +;int main(){ +; alchar newChar; +;} +; $ clang++ -O0 -g -gdwarf-5 debug-info-template-align.cpp -c + +; CHECK: .debug_abbrev contents: + +; CHECK: [5] DW_TAG_typedef DW_CHILDREN_no +; CHECK: DW_AT_alignment DW_FORM_udata + +; CHECK: .debug_info contents: + +;CHECK: DW_TAG_typedef [5] +;CHECK: DW_AT_name {{.*}} "alchar" +;CHECK-NEXT: DW_AT_alignment [DW_FORM_udata] (64) + + +; ModuleID = '/dir/test.cpp' +source_filename = "/dir/test.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline norecurse nounwind optnone uwtable +define dso_local i32 @main() #0 !dbg !7 { +entry: + %newChar = alloca i8, align 64 + call void @llvm.dbg.declare(metadata i8* %newChar, metadata !12, metadata !DIExpression()), !dbg !15 + ret i32 0, !dbg !16 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { noinline norecurse nounwind optnone uwtable } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "/dir/test.cpp", directory: "/dir/", checksumkind: CSK_MD5, checksum: "872e252efdfcb9480b4bfaf8437f58ab") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 10.0.0 "} +!7 = distinct !DISubprogram(name: "main", scope: !8, file: !8, line: 12, type: !9, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DIFile(filename: "test.cpp", directory: "/dir", checksumkind: CSK_MD5, checksum: "872e252efdfcb9480b4bfaf8437f58ab") +!9 = !DISubroutineType(types: !10) +!10 = !{!11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocalVariable(name: "newChar", scope: !7, file: !8, line: 13, type: !13) +!13 = !DIDerivedType(tag: DW_TAG_typedef, name: "alchar", file: !8, line: 10, baseType: !14, align: 512) +!14 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!15 = !DILocation(line: 13, column: 10, scope: !7) +!16 = !DILocation(line: 14, column: 1, scope: !7) diff --git a/llvm/test/DebugInfo/X86/debug_addr.ll b/llvm/test/DebugInfo/X86/debug_addr.ll index e6dbe7d029d95..55e24b2764c18 100644 --- a/llvm/test/DebugInfo/X86/debug_addr.ll +++ b/llvm/test/DebugInfo/X86/debug_addr.ll @@ -28,7 +28,7 @@ ; DWARF5: .debug_info contents: ; DWARF5: Compile Unit:{{.*}}version = 0x0005 ; DWARF5-NOT: Compile Unit -; DWARF5: DW_TAG_compile_unit +; DWARF5: DW_TAG_skeleton_unit ; DWARF5-NOT: DW_TAG_{{.*}} ; DWARF5: DW_AT_GNU_dwo_name{{.*}}test.dwo ; DWARF5: DW_AT_addr_base{{.*}}0x00000008 diff --git a/llvm/test/DebugInfo/X86/dwarfdump-debug-loc-simple.test b/llvm/test/DebugInfo/X86/dwarfdump-debug-loc-simple.test index 15d688ea72d45..29d27982acce6 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-debug-loc-simple.test +++ b/llvm/test/DebugInfo/X86/dwarfdump-debug-loc-simple.test @@ -4,19 +4,19 @@ Note: the input file was generated from Inputs/dwarfdump-test-loc-list-32bit.elf CHECK: .debug_info CHECK: DW_AT_name{{.*}}"f" CHECK: DW_AT_location{{.*}}([[F_LOC:0x[0-9a-f]*]] -CHECK-NEXT: [0x00000000, 0x00000023): DW_OP_reg1 ECX -CHECK-NEXT: [0x00000023, 0x0000005d): DW_OP_breg5 EBP-16) +CHECK-NEXT: [0x00000000, 0x00000023) ".text": DW_OP_reg1 ECX +CHECK-NEXT: [0x00000023, 0x0000005d) ".text": DW_OP_breg5 EBP-16) CHECK: DW_AT_name{{.*}}"g" CHECK: DW_AT_location{{.*}}([[G_LOC:0x[0-9a-f]*]] -CHECK-NEXT: [0x00000000, 0x00000020): DW_OP_reg0 EAX -CHECK-NEXT: [0x00000020, 0x0000005d): DW_OP_breg5 EBP-12) +CHECK-NEXT: [0x00000000, 0x00000020) ".text": DW_OP_reg0 EAX +CHECK-NEXT: [0x00000020, 0x0000005d) ".text": DW_OP_breg5 EBP-12) CHECK: .debug_loc contents: CHECK-NEXT: [[F_LOC]]: this is actually the wrong location due to PR14763, but that doesn't matter for the purposes of testing dwarfdump -CHECK-NEXT: (0x00000000, 0x00000023): DW_OP_reg1 ECX -CHECK-NEXT: (0x00000023, 0x0000005d): DW_OP_breg5 EBP-16 +CHECK-NEXT: (0x00000000, 0x00000023) ".text": DW_OP_reg1 ECX +CHECK-NEXT: (0x00000023, 0x0000005d) ".text": DW_OP_breg5 EBP-16 CHECK: [[G_LOC]]: -CHECK-NEXT: (0x00000000, 0x00000020): DW_OP_reg0 EAX -CHECK-NEXT: (0x00000020, 0x0000005d): DW_OP_breg5 EBP-12 +CHECK-NEXT: (0x00000000, 0x00000020) ".text": DW_OP_reg0 EAX +CHECK-NEXT: (0x00000020, 0x0000005d) ".text": DW_OP_breg5 EBP-12 diff --git a/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s index f8395818734ed..19bbd77586d8f 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s +++ b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-dwarf64.s @@ -196,14 +196,14 @@ Range1_end: # CHECK-NOT: Compile Unit: # CHECK: DW_TAG_compile_unit # CHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000014) -# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000020 +# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000034 # CHECK-NEXT: [0x0000002a, 0x00000034) ".text") # CHECK: .debug_info.dwo contents: # CHECK: Compile Unit: # CHECK-NOT: contents: # CHECK: DW_TAG_compile_unit -# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000011 +# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000025 # CHECK-NEXT: [0x0000002a, 0x00000034)) #ERR: error: parsing a range list table: did not detect a valid list table with base = 0x8 diff --git a/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s b/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s index 7886374c4d637..0d6898df170bf 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s +++ b/llvm/test/DebugInfo/X86/dwarfdump-rnglists.s @@ -192,14 +192,14 @@ Range1_end: # CHECK-NOT: Compile Unit: # CHECK: DW_TAG_compile_unit # CHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) -# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000018 +# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000024 # CHECK-NEXT: [0x0000002a, 0x00000034) ".text") # CHECK: .debug_info.dwo contents: # CHECK: Compile Unit: # CHECK-NOT: contents: # CHECK: DW_TAG_compile_unit -# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000009 +# CHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = 0x00000015 # CHECK-NEXT: [0x0000002a, 0x00000034)) #ERR: error: parsing a range list table: did not detect a valid list table with base = 0x8 diff --git a/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll b/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll index 74e94643b9c08..1761c4aa8fe4b 100644 --- a/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll +++ b/llvm/test/DebugInfo/X86/split-dwarf-v5-ranges.ll @@ -3,7 +3,7 @@ ; CHECK: .debug_info contents: ; CHECK: .debug_info.dwo contents: -; CHECK: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x0) rangelist = 0x00000004 +; CHECK: DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x0) rangelist = 0x00000010 ; CHECK: [0x0000000000000001, 0x000000000000000c) ".text" ; CHECK: [0x000000000000000e, 0x0000000000000013) ".text") diff --git a/llvm/test/DebugInfo/X86/string-offsets-table-order.ll b/llvm/test/DebugInfo/X86/string-offsets-table-order.ll index e39f70c0e3099..ab54930fe916a 100644 --- a/llvm/test/DebugInfo/X86/string-offsets-table-order.ll +++ b/llvm/test/DebugInfo/X86/string-offsets-table-order.ll @@ -12,11 +12,11 @@ ; in different order. ; CHECK: .debug_info contents: -; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_skeleton_unit ; CHECK: DW_AT_comp_dir [DW_FORM_strx1] (indexed (00000000) string = "X3") -; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_skeleton_unit ; CHECK: DW_AT_comp_dir [DW_FORM_strx1] (indexed (00000001) string = "X2") -; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_skeleton_unit ; CHECK: DW_AT_comp_dir [DW_FORM_strx1] (indexed (00000002) string = "X1") ; CHECK: .debug_info.dwo contents: diff --git a/llvm/test/DebugInfo/X86/string-offsets-table.ll b/llvm/test/DebugInfo/X86/string-offsets-table.ll index 9960fd833ed98..be960d1e017c8 100644 --- a/llvm/test/DebugInfo/X86/string-offsets-table.ll +++ b/llvm/test/DebugInfo/X86/string-offsets-table.ll @@ -56,7 +56,7 @@ ; SPLIT: .debug_info contents: ; SPLIT-NEXT: 0x00000000: Compile Unit:{{.*}}DW_UT_skeleton ; SPLIT-NOT: contents: -; SPLIT: DW_TAG_compile_unit +; SPLIT: DW_TAG_skeleton_unit ; SPLIT-NOT: {{DW_TAG|contents:}} ; SPLIT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008) ; SPLIT: DW_AT_comp_dir [DW_FORM_strx1] (indexed (00000000) string = "/home/test") diff --git a/llvm/test/DebugInfo/debugmacinfo-dwo.test b/llvm/test/DebugInfo/debugmacinfo-dwo.test new file mode 100644 index 0000000000000..7c5f7ef56531d --- /dev/null +++ b/llvm/test/DebugInfo/debugmacinfo-dwo.test @@ -0,0 +1,20 @@ +RUN: llvm-dwarfdump -debug-macro %p/Inputs/dwarfdump-macro.dwo \ +RUN: | FileCheck %s -check-prefix TEST_MACINFODWO + +; This test verifies that llvm-dwarfdump tools know how to read .debug_macinfo.dwo +; section. +; dwarfdump-macro.dwo has been generated from Inputs/dwarfdump-macro.cc +; clang++ -c -O0 -DM3=Value3 -include dwarfdump-macro-cmd.h dwarfdump-macro.cc -fdebug-macro -gsplit-dwarf + +TEST_MACINFODWO: .debug_macinfo.dwo contents: +TEST_MACINFODWO: DW_MACINFO_start_file - lineno: 0 filenum: 1 +TEST_MACINFODWO: DW_MACINFO_start_file - lineno: 0 filenum: 2 +TEST_MACINFODWO: DW_MACINFO_define - lineno: 1 macro: M4 Value4 +TEST_MACINFODWO: DW_MACINFO_end_file +TEST_MACINFODWO: DW_MACINFO_define - lineno: 1 macro: M1 Value1 +TEST_MACINFODWO: DW_MACINFO_start_file - lineno: 2 filenum: 3 +TEST_MACINFODWO: DW_MACINFO_undef - lineno: 4 macro: M1 +TEST_MACINFODWO: DW_MACINFO_define - lineno: 5 macro: M1 NewValue1 +TEST_MACINFODWO: DW_MACINFO_end_file +TEST_MACINFODWO: DW_MACINFO_define - lineno: 3 macro: M2(x,y) ((x)+(y)* Value2) +TEST_MACINFODWO: DW_MACINFO_end_file diff --git a/llvm/test/DebugInfo/symbolize-build-id.test b/llvm/test/DebugInfo/symbolize-build-id.test new file mode 100644 index 0000000000000..40221ae9e0574 --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-build-id.test @@ -0,0 +1,28 @@ +# RUN: yaml2obj %s -o %t + +# RUN: llvm-symbolizer --debug-file-directory=/non-existent --obj=%t 0x20112f | FileCheck --check-prefix=UNKNOWN %s + +# UNKNOWN: ?? +# UNKNOWN-NEXT: ??:0:0 + +# RUN: llvm-symbolizer --debug-file-directory=%p/Inputs --obj=%t 0x20112f | FileCheck --check-prefix=FOUND %s + +# FOUND: main +# FOUND-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .note.gnu.build-id + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Content: 040000000800000003000000474e5500abb50d82b6bdc861 +ProgramHeaders: + - Type: PT_NOTE + Flags: [ PF_R ] + Sections: + - Section: .note.gnu.build-id diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll deleted file mode 100644 index faf60f3acdb30..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll +++ /dev/null @@ -1,23 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i8* @simp1(i32 %x) { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 42 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CMP]], i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*) -; CHECK-NEXT: ret i8* [[ADDR]] -; -entry: - %cmp = icmp slt i32 %x, 42 - %addr = select i1 %cmp, i8* blockaddress(@simp1, %bb1), i8* blockaddress(@simp1, %bb2) - ret i8* %addr - -bb1: - ret i8* null - -bb2: - ret i8* null -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll deleted file mode 100644 index cb0f82e37573a..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll +++ /dev/null @@ -1,90 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @simp1() { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 10 -; -entry: - br i1 true, label %if.then, label %if.else - -if.then: - ret i32 10 - -if.else: - ret i32 12 -} - -define i32 @simp2() { -; CHECK-LABEL: @simp2( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 200 -; -entry: - br i1 false, label %if.then, label %if.else - -if.then: - ret i32 99 - -if.else: - ret i32 200 -} - -declare void @foo(i64) - -define i64 @merge_into_predecessor(i64 %a, i64 %b) { -; CHECK-LABEL: @merge_into_predecessor( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = add i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: call void @foo(i64 [[R]]) -; CHECK-NEXT: call void @foo(i64 [[A]]) -; CHECK-NEXT: ret i64 [[R]] -; -entry: - br label %bb.next - -bb.next: - %r = add i64 %a, %b - call void @foo(i64 %r) - call void @foo(i64 %a) - br label %bb.next.next - -bb.next.next: - ret i64 %r -} - -define i64 @merge_into_predecessor_with_phi(i64 %a, i64 %b, i1 %c) { -; CHECK-LABEL: @merge_into_predecessor_with_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @foo(i64 [[B:%.*]]) -; CHECK-NEXT: [[R:%.*]] = add i64 [[A:%.*]], [[B]] -; CHECK-NEXT: call void @foo(i64 [[R]]) -; CHECK-NEXT: call void @foo(i64 [[A]]) -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB_NEXT_NEXT:%.*]], label [[BB_EXIT:%.*]] -; CHECK: bb.next.next: -; CHECK-NEXT: br label [[BB_EXIT]] -; CHECK: bb.exit: -; CHECK-NEXT: [[RET:%.*]] = phi i64 [ [[R]], [[ENTRY:%.*]] ], [ 10, [[BB_NEXT_NEXT]] ] -; CHECK-NEXT: ret i64 [[RET]] -; -entry: - call void @foo(i64 %b) - br label %bb.next - -bb.next: - %r = add i64 %a, %b - call void @foo(i64 %r) - call void @foo(i64 %a) - br i1 %c, label %bb.next.next, label %bb.exit - -bb.next.next: - br label %bb.exit - -bb.exit: - %ret = phi i64 [ %r, %bb.next], [ 10, %bb.next.next] - ret i64 %ret - -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll deleted file mode 100644 index 11b70fa526770..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll +++ /dev/null @@ -1,109 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @remove_dead_blocks() { -; CHECK-LABEL: @remove_dead_blocks( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.1: - ret i32 2 - -bb.2: - ret i32 3 -} - -define i32 @simp1() { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK: bb.1: -; CHECK-NEXT: ret i32 2 -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.1: - ret i32 2 - -bb.2: - br i1 undef, label %bb.1, label %bb.3 - -bb.3: - ret i32 3 -} - -define i32 @remove_dead_block_with_phi() { -; CHECK-LABEL: @remove_dead_block_with_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB_2:%.*]] -; CHECK: bb.2: -; CHECK-NEXT: ret i32 1 -; CHECK-NEXT: } -; -entry: - br label %bb.2 - -bb.1: - br label %bb.2 - -bb.2: - %rv = phi i32 [ 1, %entry ], [ 2, %bb.1 ] - ret i32 %rv -} - -define i32 @remove_dead_blocks_remaining_uses(i32 %a) { -; CHECK-LABEL: @remove_dead_blocks_remaining_uses( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.2: - ret i32 %res - -bb.1: - %res = add i32 %a, 10 - br label %bb.2 -} - -define i32 @remove_dead_blocks_remaining_uses2(i32 %a, i1 %cond) { -; CHECK-LABEL: @remove_dead_blocks_remaining_uses2( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK: bb.2: -; CHECK-NEXT: [[RES2:%.*]] = add i32 undef, 10 -; CHECK-NEXT: [[RES3:%.*]] = mul i32 [[RES2]], undef -; CHECK-NEXT: ret i32 [[RES3]] -; CHECK: bb.3: -; CHECK-NEXT: ret i32 undef -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.2: - %res2 = add i32 %res, 10 - %res3 = mul i32 %res2, %res - ret i32 %res3 - -bb.3: - br label %bb.4 - -bb.4: - ret i32 %res - -bb.1: - %res = add i32 %a, 10 - br i1 %cond, label %bb.2, label %bb.3 - br label %bb.2 -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll deleted file mode 100644 index 76db503faeb25..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll +++ /dev/null @@ -1,70 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @phi_cond_branch_eliminated() { -; CHECK-LABEL: @phi_cond_branch_eliminated( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 20 -; -entry: - br i1 true, label %bb.2, label %bb.3 - -bb.2: - br label %bb.3 - -bb.3: - %ret = phi i32 [ 10, %entry ], [ 20, %bb.2 ] - ret i32 %ret -} - -define i32 @phi_removed() { -; CHECK-LABEL: @phi_removed( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB_3:%.*]] -; CHECK: bb.3: -; CHECK-NEXT: ret i32 0 -; -entry: - br i1 false, label %bb.2, label %bb.3 - -bb.2: - %pv = phi i32 [ 10, %entry ] - br label %bb.3 - -bb.3: - ret i32 0 -} - -define i32 @phi_in_dead_region() { -; CHECK-LABEL: @phi_in_dead_region( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; -entry: - ret i32 1 - -bb.1: - br i1 true, label %bb.2, label %bb.3 - -bb.2: - br label %bb.3 - -bb.3: - %ret = phi i32 [ 10, %bb.1 ], [ 20, %bb.2 ] - ret i32 %ret -} - -define i32 @phi_in_mergable_blocks() { -; CHECK-LABEL: @phi_in_mergable_blocks( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 10 -; -entry: - br label %bb.1 - -bb.1: - %pv = phi i32 [ 10, %entry ] - ret i32 %pv -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll deleted file mode 100644 index 82a0e0dac2369..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll +++ /dev/null @@ -1,40 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 < %s -S -verify-dom-info | FileCheck %s - -; Check that we do not crash when we remove edges multiple times in -; the DomTreeUpdater. -define void @test() { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i8 undef, label [[IF_THEN_EPIL:%.*]] [ -; CHECK-NEXT: i8 32, label [[FOR_INC_EPIL:%.*]] -; CHECK-NEXT: i8 46, label [[FOR_INC_EPIL]] -; CHECK-NEXT: i8 95, label [[FOR_INC_EPIL]] -; CHECK-NEXT: i8 45, label [[FOR_INC_EPIL]] -; CHECK-NEXT: i8 126, label [[FOR_INC_EPIL]] -; CHECK-NEXT: ] -; CHECK: if.then.epil: -; CHECK-NEXT: unreachable -; CHECK: for.inc.epil: -; CHECK-NEXT: ret void -; -entry: - br label %for.body.epil - -for.body.epil: ; preds = %entry - switch i8 undef, label %if.then.epil [ - i8 32, label %for.inc.epil - i8 46, label %for.inc.epil - i8 95, label %for.inc.epil - i8 45, label %for.inc.epil - i8 126, label %for.inc.epil - ] - -if.then.epil: ; preds = %for.body.epil - unreachable - -for.inc.epil: ; preds = %for.body.epil, %for.body.epil, %for.body.epil, %for.body.epil, %for.body.epil - ret void -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll deleted file mode 100644 index b3edd1aa50584..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll +++ /dev/null @@ -1,122 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 < %s -S -verify-dom-info | FileCheck %s - -define void @test() { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 undef, label [[SW_DEFAULT23:%.*]] [ -; CHECK-NEXT: i32 129, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 215, label [[SW_BB1:%.*]] -; CHECK-NEXT: i32 117, label [[SW_BB1]] -; CHECK-NEXT: i32 207, label [[SW_BB1]] -; CHECK-NEXT: i32 158, label [[SW_BB1]] -; CHECK-NEXT: i32 94, label [[SW_BB1]] -; CHECK-NEXT: i32 219, label [[SW_BB1]] -; CHECK-NEXT: i32 88, label [[SW_BB1]] -; CHECK-NEXT: i32 168, label [[SW_BB1]] -; CHECK-NEXT: i32 295, label [[SW_BB1]] -; CHECK-NEXT: i32 294, label [[SW_BB1]] -; CHECK-NEXT: i32 296, label [[SW_BB1]] -; CHECK-NEXT: i32 67, label [[SW_BB1]] -; CHECK-NEXT: i32 293, label [[SW_BB1]] -; CHECK-NEXT: i32 382, label [[SW_BB1]] -; CHECK-NEXT: i32 335, label [[SW_BB1]] -; CHECK-NEXT: i32 393, label [[SW_BB1]] -; CHECK-NEXT: i32 415, label [[SW_BB1]] -; CHECK-NEXT: i32 400, label [[SW_BB1]] -; CHECK-NEXT: i32 383, label [[SW_BB1]] -; CHECK-NEXT: i32 421, label [[SW_BB1]] -; CHECK-NEXT: i32 422, label [[SW_BB1]] -; CHECK-NEXT: i32 302, label [[SW_BB1]] -; CHECK-NEXT: i32 303, label [[SW_BB1]] -; CHECK-NEXT: i32 304, label [[SW_BB1]] -; CHECK-NEXT: i32 420, label [[SW_BB1]] -; CHECK-NEXT: i32 401, label [[SW_EPILOG24:%.*]] -; CHECK-NEXT: i32 53, label [[SW_BB12:%.*]] -; CHECK-NEXT: i32 44, label [[SW_BB12]] -; CHECK-NEXT: ] -; CHECK: sw.bb: -; CHECK-NEXT: unreachable -; CHECK: sw.bb1: -; CHECK-NEXT: br label [[SW_EPILOG24]] -; CHECK: sw.bb12: -; CHECK-NEXT: switch i32 undef, label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 47, label [[SW_BB13:%.*]] -; CHECK-NEXT: i32 8, label [[SW_BB13]] -; CHECK-NEXT: ] -; CHECK: sw.bb13: -; CHECK-NEXT: unreachable -; CHECK: sw.default: -; CHECK-NEXT: unreachable -; CHECK: sw.default23: -; CHECK-NEXT: unreachable -; CHECK: sw.epilog24: -; CHECK-NEXT: [[PREVIOUS_3:%.*]] = phi i32 [ undef, [[SW_BB1]] ], [ 401, [[ENTRY:%.*]] ] -; CHECK-NEXT: unreachable -; -entry: - br label %while.body - -while.body: ; preds = %entry - switch i32 undef, label %sw.default23 [ - i32 129, label %sw.bb - i32 215, label %sw.bb1 - i32 117, label %sw.bb1 - i32 207, label %sw.bb1 - i32 158, label %sw.bb1 - i32 94, label %sw.bb1 - i32 219, label %sw.bb1 - i32 88, label %sw.bb1 - i32 168, label %sw.bb1 - i32 295, label %sw.bb1 - i32 294, label %sw.bb1 - i32 296, label %sw.bb1 - i32 67, label %sw.bb1 - i32 293, label %sw.bb1 - i32 382, label %sw.bb1 - i32 335, label %sw.bb1 - i32 393, label %sw.bb1 - i32 415, label %sw.bb1 - i32 400, label %sw.bb1 - i32 383, label %sw.bb1 - i32 421, label %sw.bb1 - i32 422, label %sw.bb1 - i32 302, label %sw.bb1 - i32 303, label %sw.bb1 - i32 304, label %sw.bb1 - i32 420, label %sw.bb1 - i32 401, label %sw.epilog24 - i32 53, label %sw.bb12 - i32 44, label %sw.bb12 - ] - -sw.bb: ; preds = %while.body - unreachable - -sw.bb1: ; preds = %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body - br i1 false, label %land.lhs.true, label %sw.epilog24 - -land.lhs.true: ; preds = %sw.bb1 - br label %sw.epilog24 - -sw.bb12: ; preds = %while.body, %while.body - switch i32 undef, label %sw.default [ - i32 47, label %sw.bb13 - i32 8, label %sw.bb13 - ] - -sw.bb13: ; preds = %sw.bb12, %sw.bb12 - unreachable - -sw.default: ; preds = %sw.bb12 - unreachable - -sw.default23: ; preds = %while.body - unreachable - -sw.epilog24: ; preds = %land.lhs.true, %sw.bb1, %while.body - %Previous.3 = phi i32 [ undef, %land.lhs.true ], [ undef, %sw.bb1 ], [ 401, %while.body ] - unreachable -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll deleted file mode 100644 index f9705a6948b21..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll +++ /dev/null @@ -1,25 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @simp1() { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK: bb.1: -; CHECK-NEXT: br label [[BB_1:%.*]] -; CHECK: bb.2: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[BB_2:%.*]] ] -; CHECK-NEXT: br label [[BB_2]] -; -entry: - ret i32 1 - -bb.1: - br label %bb.1 - -bb.2: - %p = phi i32 [ 0, %bb.2] - br label %bb.2 -} diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s new file mode 100644 index 0000000000000..20fa5536302d7 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s @@ -0,0 +1,19 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t/macho_weak_refs.o %s +# RUN: llvm-jitlink -noexec -check-name=jitlink-check-bar-present -define-abs bar=0x1 -check=%s %t/macho_weak_refs.o +# RUN: llvm-jitlink -noexec -check-name=jitlink-check-bar-absent -check=%s %t/macho_weak_refs.o + +# Test weak reference handling by linking with and without a definition of 'bar' available. + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 sdk_version 10, 14 + .globl _main + .p2align 4, 0x90 +_main: +# jitlink-check-bar-present: *{8}(got_addr(macho_weak_refs.o, bar)) = bar +# jitlink-check-bar-absent: *{8}(got_addr(macho_weak_refs.o, bar)) = 0 + cmpq $0, bar@GOTPCREL(%rip) + + .weak_reference bar + +.subsections_via_symbols diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s index c97b1ecce6d6d..5fabc6db1218b 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s @@ -40,6 +40,17 @@ test_gotld: movq external_data@GOTPCREL(%rip), %rax retq + +# Check X86_64_RELOC_GOTPCREL handling with cmp instructions, which have +# negative addends. +# +# jitlink-check: decode_operand(test_gotcmpq, 3) = got_addr(macho_reloc.o, external_data) - next_pc(test_gotcmpq) + .globl test_gotcmpq + .align 4, 0x90 +test_gotcmpq: + cmpq $0, external_data@GOTPCREL(%rip) + retq + # Check that calls to external functions trigger the generation of stubs and GOT # entries. # @@ -118,16 +129,16 @@ Lanon_data: # anonymous. # # Note: +8 offset in expression below to accounts for sizeof(Lanon_data). -# jitlink-check: *{8}(section_addr(macho_reloc.o, __data) + 8) = (section_addr(macho_reloc.o, __data) + 8) - named_data + 2 +# jitlink-check: *{8}(section_addr(macho_reloc.o, __data) + 8) = (section_addr(macho_reloc.o, __data) + 8) - named_data - 2 .p2align 3 Lanon_minuend_quad: - .quad Lanon_minuend_quad - named_data + 2 + .quad Lanon_minuend_quad - named_data - 2 # Note: +16 offset in expression below to accounts for sizeof(Lanon_data) + sizeof(Lanon_minuend_long). -# jitlink-check: *{4}(section_addr(macho_reloc.o, __data) + 16) = ((section_addr(macho_reloc.o, __data) + 16) - named_data + 2)[31:0] +# jitlink-check: *{4}(section_addr(macho_reloc.o, __data) + 16) = ((section_addr(macho_reloc.o, __data) + 16) - named_data - 2)[31:0] .p2align 2 Lanon_minuend_long: - .long Lanon_minuend_long - named_data + 2 + .long Lanon_minuend_long - named_data - 2 # Named quad storage target (first named atom in __data). .globl named_data @@ -221,11 +232,11 @@ minuend_long3: # (i.e. is part of an alt_entry chain that includes 'A'). # # Check "A: .long B - C + D" where 'B' is an alt_entry for 'A'. -# jitlink-check: *{4}subtractor_with_alt_entry_minuend_long = (subtractor_with_alt_entry_minuend_long_B - named_data + 2)[31:0] +# jitlink-check: *{4}subtractor_with_alt_entry_minuend_long = (subtractor_with_alt_entry_minuend_long_B - named_data - 2)[31:0] .globl subtractor_with_alt_entry_minuend_long .p2align 2 subtractor_with_alt_entry_minuend_long: - .long subtractor_with_alt_entry_minuend_long_B - named_data + 2 + .long subtractor_with_alt_entry_minuend_long_B - named_data - 2 .globl subtractor_with_alt_entry_minuend_long_B .p2align 2 @@ -234,11 +245,11 @@ subtractor_with_alt_entry_minuend_long_B: .long 0 # Check "A: .quad B - C + D" where 'B' is an alt_entry for 'A'. -# jitlink-check: *{8}subtractor_with_alt_entry_minuend_quad = (subtractor_with_alt_entry_minuend_quad_B - named_data + 2) +# jitlink-check: *{8}subtractor_with_alt_entry_minuend_quad = (subtractor_with_alt_entry_minuend_quad_B - named_data - 2) .globl subtractor_with_alt_entry_minuend_quad .p2align 3 subtractor_with_alt_entry_minuend_quad: - .quad subtractor_with_alt_entry_minuend_quad_B - named_data + 2 + .quad subtractor_with_alt_entry_minuend_quad_B - named_data - 2 .globl subtractor_with_alt_entry_minuend_quad_B .p2align 3 @@ -247,11 +258,11 @@ subtractor_with_alt_entry_minuend_quad_B: .quad 0 # Check "A: .long B - C + D" where 'C' is an alt_entry for 'A'. -# jitlink-check: *{4}subtractor_with_alt_entry_subtrahend_long = (named_data - subtractor_with_alt_entry_subtrahend_long_B + 2)[31:0] +# jitlink-check: *{4}subtractor_with_alt_entry_subtrahend_long = (named_data - subtractor_with_alt_entry_subtrahend_long_B - 2)[31:0] .globl subtractor_with_alt_entry_subtrahend_long .p2align 2 subtractor_with_alt_entry_subtrahend_long: - .long named_data - subtractor_with_alt_entry_subtrahend_long_B + 2 + .long named_data - subtractor_with_alt_entry_subtrahend_long_B - 2 .globl subtractor_with_alt_entry_subtrahend_long_B .p2align 2 @@ -260,11 +271,11 @@ subtractor_with_alt_entry_subtrahend_long_B: .long 0 # Check "A: .quad B - C + D" where 'B' is an alt_entry for 'A'. -# jitlink-check: *{8}subtractor_with_alt_entry_subtrahend_quad = (named_data - subtractor_with_alt_entry_subtrahend_quad_B + 2) +# jitlink-check: *{8}subtractor_with_alt_entry_subtrahend_quad = (named_data - subtractor_with_alt_entry_subtrahend_quad_B - 2) .globl subtractor_with_alt_entry_subtrahend_quad .p2align 3 subtractor_with_alt_entry_subtrahend_quad: - .quad named_data - subtractor_with_alt_entry_subtrahend_quad_B + 2 + .quad named_data - subtractor_with_alt_entry_subtrahend_quad_B - 2 .globl subtractor_with_alt_entry_subtrahend_quad_B .p2align 3 diff --git a/llvm/test/Feature/reserve_global_reg.ll b/llvm/test/Feature/reserve_global_reg.ll deleted file mode 100644 index 405f3eea00a5e..0000000000000 --- a/llvm/test/Feature/reserve_global_reg.ll +++ /dev/null @@ -1,30 +0,0 @@ -; REQUIRES: arm -; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s -; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s -; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s - -; int test(int a, int b, int c) { -; return a + b + c; -; } - -; Function Attrs: noinline nounwind optnone -define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { -entry: - %a.addr = alloca i32, align 4 - %b.addr = alloca i32, align 4 - %c.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - store i32 %b, i32* %b.addr, align 4 - store i32 %c, i32* %c.addr, align 4 - %0 = load i32, i32* %a.addr, align 4 - %1 = load i32, i32* %b.addr, align 4 - %add = add nsw i32 %0, %1 - %2 = load i32, i32* %c.addr, align 4 - %add1 = add nsw i32 %add, %2 - ret i32 %add1 -} - -; CHECK-RESERVE-FP7: Register r7 has been specified but is used as the frame pointer for this target. -; CHECK-RESERVE-FP11: Register r11 has been specified but is used as the frame pointer for this target. -; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as the frame pointer for this target. - diff --git a/llvm/test/FileCheck/dump-input-enable.txt b/llvm/test/FileCheck/dump-input-enable.txt index 511248ea1ac7d..cf47f03dfa835 100644 --- a/llvm/test/FileCheck/dump-input-enable.txt +++ b/llvm/test/FileCheck/dump-input-enable.txt @@ -42,33 +42,32 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op ; RUN: %ProtectFileCheckOutput FileCheck -dump-input=help \ ; RUN: | FileCheck %s -check-prefix=HELP -HELP-NOT: {{.}} -HELP: The following description was requested by -dump-input=help -HELP: try{{.*}}-color -HELP-NOT: {{.}} - ;-------------------------------------------------- ; Check -dump-input=never. ; ; Include the case without -v, which isn't covered elsewhere. ;-------------------------------------------------- +; FileCheck success, no -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=never 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -allow-empty \ ; RUN: -check-prefixes=NOTRACE,NODUMP +; FileCheck fail, no -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=never 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,ERR,NODUMP +; FileCheck success, -v => no dump, trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=never -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,NODUMP +; FileCheck fail, -v => no dump, trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=never -v 2>&1 \ @@ -78,11 +77,13 @@ HELP-NOT: {{.}} ; Check no -dump-input, which defaults to never. ;-------------------------------------------------- +; FileCheck success, -v => no dump, trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,NODUMP +; FileCheck fail, -v => no dump, trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ @@ -94,23 +95,27 @@ HELP-NOT: {{.}} ; Include the case without -v, which isn't covered elsewhere. ;-------------------------------------------------- +; FileCheck success, no -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=fail 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -allow-empty \ ; RUN: -check-prefixes=NOTRACE,NODUMP +; FileCheck fail, no -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=fail 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,ERR,DUMP-ERR +; FileCheck success, -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=fail -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -allow-empty \ ; RUN: -check-prefixes=NOTRACE,NODUMP +; FileCheck fail, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=fail -v 2>&1 \ @@ -121,24 +126,32 @@ HELP-NOT: {{.}} ; Check -dump-input-on-failure. ;-------------------------------------------------- +; Command-line option. + +; FileCheck success, -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input-on-failure -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -allow-empty \ ; RUN: -check-prefixes=NOTRACE,NODUMP +; FileCheck fail, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input-on-failure -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines \ ; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V +; FILECHECK_DUMP_INPUT_ON_FAILURE=1. + +; FileCheck success, -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -allow-empty \ ; RUN: -check-prefixes=NOTRACE,NODUMP +; FileCheck fail, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ @@ -149,23 +162,105 @@ HELP-NOT: {{.}} ; Check -dump-input=always. ;-------------------------------------------------- +; FileCheck success, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=always -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,DUMP-OK +; FileCheck fail, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -dump-input=always -v 2>&1 \ ; RUN: | FileCheck %s -match-full-lines \ ; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V +;-------------------------------------------------- +; Check multiple -dump-input options. +; +; This ocurrs most commonly when a test author specifies -dump-input on a +; specific FileCheck call while a test runner specifies -dump-input in +; FILECHECK_OPTS, but check the behavior generally. +; +; "help" has precedence, and then the most verbose value wins. The most +; common combinations involve "fail" and "always", so test those the most. +;-------------------------------------------------- + +;- - - - - - - - - - - - - - - - - - - - - - - - - +; Check duplicate. +;- - - - - - - - - - - - - - - - - - - - - - - - - + +; fail, fail => fail (FileCheck fail => dump) +; RUN: %ProtectFileCheckOutput \ +; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ +; RUN: -match-full-lines -dump-input=fail -dump-input=fail -v \ +; RUN: 2>&1 \ +; RUN: | FileCheck %s -match-full-lines \ +; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V + +;- - - - - - - - - - - - - - - - - - - - - - - - - +; Check precedence. +;- - - - - - - - - - - - - - - - - - - - - - - - - + +; help, always => help +; RUN: %ProtectFileCheckOutput \ +; RUN: FileCheck -input-file %t.err -color %t.check \ +; RUN: -dump-input=help -dump-input=always \ +; RUN: | FileCheck %s -check-prefix=HELP + +; always, fail => always (FileCheck success => dump) +; RUN: %ProtectFileCheckOutput \ +; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ +; RUN: -match-full-lines -dump-input=always -dump-input=fail \ +; RUN: -v 2>&1 \ +; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,DUMP-OK + +; fail, never => fail (FileCheck fail => dump) +; RUN: %ProtectFileCheckOutput \ +; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ +; RUN: -match-full-lines -dump-input=fail -dump-input=never -v \ +; RUN: 2>&1 \ +; RUN: | FileCheck %s -match-full-lines \ +; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V + +;- - - - - - - - - - - - - - - - - - - - - - - - - +; Check that order doesn't matter. +;- - - - - - - - - - - - - - - - - - - - - - - - - + +; fail, always => always (FileCheck success => dump) +; RUN: %ProtectFileCheckOutput \ +; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ +; RUN: -match-full-lines -dump-input=fail -dump-input=always \ +; RUN: -v 2>&1 \ +; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,DUMP-OK + +;- - - - - - - - - - - - - - - - - - - - - - - - - +; Check that FILECHECK_OPTS isn't handled differently. +;- - - - - - - - - - - - - - - - - - - - - - - - - + +; always, fail => always (FileCheck success => dump) +; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-dump-input=always \ +; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ +; RUN: -match-full-lines -dump-input=fail -v 2>&1 \ +; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,DUMP-OK + +; fail, always => always (FileCheck success => dump) +; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-dump-input=fail \ +; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ +; RUN: -match-full-lines -dump-input=always -v 2>&1 \ +; RUN: | FileCheck %s -match-full-lines -check-prefixes=NOTRACE,DUMP-OK + ; END. ;-------------------------------------------------- -; Check the output for all cases that actually process directives. +; Check the output. ;-------------------------------------------------- +; HELP-NOT: {{.}} +; HELP: The following description was requested by -dump-input=help +; HELP: try{{.*}}-color +; HELP-NOT: {{.}} + ; Trace is sometimes suppressed. ; TRACE: {{.*}}remark:{{.*}} ; NOTRACE-NOT: remark: diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll new file mode 100644 index 0000000000000..ba148e8d6e7a6 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll @@ -0,0 +1,75 @@ +; Checks that asan prologue does not add debug locations, which would +; fool findPrologueEndLoc because it sets the end of the prologue to the +; first instruction. Breaking on the instrumented function in a debugger +; would then stop at that instruction, before the prologue is finished. + +; RUN: opt < %s -asan -asan-module -S | FileCheck %s +; 1: void f(int *arg) { +; 2: } +; 3: int main(int argc, char **argv) { +; 4: f(&argc); +; 5: } +; clang 1.cc -g -S -emit-llvm -o - | sed 's/#0 = {/#0 = { sanitize_address/' + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local i32 @main(i32 %argc, i8** %argv) #0 !dbg !15 { +entry: +; No suffix like !dbg !123 +; CHECK: %asan_local_stack_base = alloca i64{{$}} +; CHECK: %3 = call i64 @__asan_stack_malloc_0(i64 64){{$}} + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 %argc, i32* %argc.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !21, metadata !DIExpression()), !dbg !22 + store i8** %argv, i8*** %argv.addr, align 8 + call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !23, metadata !DIExpression()), !dbg !24 + call void @f(i32* %argc.addr), !dbg !25 + ret i32 0, !dbg !26 +} + +define dso_local void @f(i32* %arg) #0 !dbg !7 { +entry: + %arg.addr = alloca i32*, align 8 + store i32* %arg, i32** %arg.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %arg.addr, metadata !12, metadata !DIExpression()), !dbg !13 + ret void, !dbg !14 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { sanitize_address noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 (git@github.com:llvm/llvm-project 1ac700cdef787383ad49a0e37d9894491ef19480)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "2.c", directory: "/home/builduser") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project 1ac700cdef787383ad49a0e37d9894491ef19480)"} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!13 = !DILocation(line: 1, column: 13, scope: !7) +!14 = !DILocation(line: 2, column: 1, scope: !7) +!15 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !16, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!16 = !DISubroutineType(types: !17) +!17 = !{!11, !11, !18} +!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) +!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) +!20 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!21 = !DILocalVariable(name: "argc", arg: 1, scope: !15, file: !1, line: 3, type: !11) +!22 = !DILocation(line: 3, column: 14, scope: !15) +!23 = !DILocalVariable(name: "argv", arg: 2, scope: !15, file: !1, line: 3, type: !18) +!24 = !DILocation(line: 3, column: 27, scope: !15) +!25 = !DILocation(line: 4, column: 3, scope: !15) +!26 = !DILocation(line: 5, column: 1, scope: !15) diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll index ad3a274c8272c..67e13e56414fd 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll @@ -18,8 +18,8 @@ entry: ; CHECK: %asan_local_stack_base = alloca i64 ; CHECK: %[[ALLOCA:.*]] = ptrtoint i8* %MyAlloca to i64 ; CHECK: %[[PHI:.*]] = phi i64 {{.*}} %[[ALLOCA]], - ; CHECK: store i64 %[[PHI]], i64* %asan_local_stack_base, !dbg - ; CHECK: call void @llvm.dbg.declare(metadata i64* %asan_local_stack_base, metadata !13, metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32)), !dbg !14 + ; CHECK: store i64 %[[PHI]], i64* %asan_local_stack_base + ; CHECK: call void @llvm.dbg.declare(metadata i64* %asan_local_stack_base, metadata !12, metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32)), !dbg !13 %0 = load i32, i32* %i.addr, align 4, !dbg !14 %add = add nsw i32 %0, 2, !dbg !15 ret i32 %add, !dbg !16 diff --git a/llvm/test/MC/AArch64/armv8.2a-crypto-apple.s b/llvm/test/MC/AArch64/armv8.2a-crypto-apple.s new file mode 100644 index 0000000000000..1b9153136d057 --- /dev/null +++ b/llvm/test/MC/AArch64/armv8.2a-crypto-apple.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -output-asm-variant=1 -triple aarch64-apple-ios -mattr=+sha3,+sm4 -show-encoding < %s | FileCheck %s + + sha512h.2d q0, q1, v2 + sha512h2.2d q0, q1, v2 + sha512su0.2d v11, v12 + sha512su1.2d v11, v13, v14 + eor3.16b v25, v12, v7, v2 + rax1.2d v30, v29, v26 + xar.2d v26, v21, v27, #63 + bcax.16b v31, v26, v2, v1 + +//CHECK: sha512h.2d q0, q1, v2 ; encoding: [0x20,0x80,0x62,0xce] +//CHECK: sha512h2.2d q0, q1, v2 ; encoding: [0x20,0x84,0x62,0xce] +//CHECK: sha512su0.2d v11, v12 ; encoding: [0x8b,0x81,0xc0,0xce] +//CHECK: sha512su1.2d v11, v13, v14 ; encoding: [0xab,0x89,0x6e,0xce] +//CHECK: eor3.16b v25, v12, v7, v2 ; encoding: [0x99,0x09,0x07,0xce] +//CHECK: rax1.2d v30, v29, v26 ; encoding: [0xbe,0x8f,0x7a,0xce] +//CHECK: xar.2d v26, v21, v27, #63 ; encoding: [0xba,0xfe,0x9b,0xce] +//CHECK: bcax.16b v31, v26, v2, v1 ; encoding: [0x5f,0x07,0x22,0xce] + + + + sm3ss1.4s v20, v23, v21, v22 + sm3tt1a.4s v20, v23, v21[3] + sm3tt1b.4s v20, v23, v21[3] + sm3tt2a.4s v20, v23, v21[3] + sm3tt2b.4s v20, v23, v21[3] + sm3partw1.4s v30, v29, v26 + sm3partw2.4s v30, v29, v26 + sm4ekey.4s v11, v11, v19 + sm4e.4s v2, v15 + +// CHECK: sm3ss1.4s v20, v23, v21, v22 ; encoding: [0xf4,0x5a,0x55,0xce] +// CHECK: sm3tt1a.4s v20, v23, v21[3] ; encoding: [0xf4,0xb2,0x55,0xce] +// CHECK: sm3tt1b.4s v20, v23, v21[3] ; encoding: [0xf4,0xb6,0x55,0xce] +// CHECK: sm3tt2a.4s v20, v23, v21[3] ; encoding: [0xf4,0xba,0x55,0xce] +// CHECK: sm3tt2b.4s v20, v23, v21[3] ; encoding: [0xf4,0xbe,0x55,0xce] +// CHECK: sm3partw1.4s v30, v29, v26 ; encoding: [0xbe,0xc3,0x7a,0xce] +// CHECK: sm3partw2.4s v30, v29, v26 ; encoding: [0xbe,0xc7,0x7a,0xce] +// CHECK: sm4ekey.4s v11, v11, v19 ; encoding: [0x6b,0xc9,0x73,0xce] +// CHECK: sm4e.4s v2, v15 ; encoding: [0xe2,0x85,0xc0,0xce] diff --git a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s index 2ca15fceccc8f..056a3ae86c07f 100644 --- a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s +++ b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s @@ -307,10 +307,10 @@ // CHECK-REQ: error: instruction requires: pa // CHECK-REQ-NEXT: ldrab x0, [x1] ldraa x0, [x1]! -// CHECK-NEXT: ldraa x0, [x1]! // encoding: [0x20,0x0c,0x20,0xf8] +// CHECK-NEXT: ldraa x0, [x1, #0]! // encoding: [0x20,0x0c,0x20,0xf8] // CHECK-REQ: error: instruction requires: pa // CHECK-REQ-NEXT: ldraa x0, [x1]! ldrab x0, [x1]! -// CHECK-NEXT: ldrab x0, [x1]! // encoding: [0x20,0x0c,0xa0,0xf8] +// CHECK-NEXT: ldrab x0, [x1, #0]! // encoding: [0x20,0x0c,0xa0,0xf8] // CHECK-REQ: error: instruction requires: pa // CHECK-REQ-NEXT: ldrab x0, [x1]! diff --git a/llvm/test/MC/COFF/cfi-sections.s b/llvm/test/MC/COFF/cfi-sections.s new file mode 100644 index 0000000000000..00a8d746c194d --- /dev/null +++ b/llvm/test/MC/COFF/cfi-sections.s @@ -0,0 +1,26 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-mingw32 %s -o - | llvm-objdump -r - | FileCheck --check-prefix=COFF_X86_64 %s +// RUN: llvm-mc -filetype=obj -triple i686-mingw32 %s -o - | llvm-objdump -r - | FileCheck --check-prefix=COFF_I686 %s + +.cfi_sections .debug_frame + +f1: + .cfi_startproc + nop + .cfi_endproc + +f2: + .cfi_startproc + nop + .cfi_endproc + +// COFF_X86_64: RELOCATION RECORDS FOR [.debug_frame]: +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_SECREL .debug_frame +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_ADDR64 .text +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_SECREL .debug_frame +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_ADDR64 .text + +// COFF_I686: RELOCATION RECORDS FOR [.debug_frame]: +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_SECREL .debug_frame +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_DIR32 .text +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_SECREL .debug_frame +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_DIR32 .text diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt b/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt index d11056044fa48..7215d086c693c 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt @@ -114,7 +114,7 @@ [0x20,0x04,0x20,0xf8] [0x20,0x04,0xa0,0xf8] -# CHECK: ldraa x0, [x1]! -# CHECK: ldrab x0, [x1]! +# CHECK: ldraa x0, [x1, #0]! +# CHECK: ldrab x0, [x1, #0]! [0x20,0x0c,0x20,0xf8] [0x20,0x0c,0xa0,0xf8] diff --git a/llvm/test/MC/MachO/reloc.s b/llvm/test/MC/MachO/reloc.s index 1379d80eb310e..bab5d63d27f45 100644 --- a/llvm/test/MC/MachO/reloc.s +++ b/llvm/test/MC/MachO/reloc.s @@ -37,7 +37,7 @@ L0: .text _f0: L1: - jmp 0xbabecafe + jmp 0x7abecafe jmp L0 jmp L1 ret diff --git a/llvm/test/MC/Mips/ll-expansion.s b/llvm/test/MC/Mips/ll-expansion.s new file mode 100644 index 0000000000000..4653a33d7e787 --- /dev/null +++ b/llvm/test/MC/Mips/ll-expansion.s @@ -0,0 +1,406 @@ +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips3 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +ll $2, 128($sp) +# MIPS32: c3 a2 00 80 ll $2, 128($sp) +# MIPS32R6: 7f a2 40 36 ll $2, 128($sp) +# MIPSN32: c3 a2 00 80 ll $2, 128($sp) +# MIPSN32R6: 7f a2 40 36 ll $2, 128($sp) +# MIPS64: c3 a2 00 80 ll $2, 128($sp) +# MIPS64R6: 7f a2 40 36 ll $2, 128($sp) + +ll $2, -128($sp) +# MIPS32: c3 a2 ff 80 ll $2, -128($sp) +# MIPS32R6: 7f a2 c0 36 ll $2, -128($sp) +# MIPSN32: c3 a2 ff 80 ll $2, -128($sp) +# MIPSN32R6: 7f a2 c0 36 ll $2, -128($sp) +# MIPS64: c3 a2 ff 80 ll $2, -128($sp) +# MIPS64R6: 7f a2 c0 36 ll $2, -128($sp) + +ll $2, 256($sp) +# MIPS32: c3 a2 01 00 ll $2, 256($sp) + +# MIPS32R6: 27 a2 01 00 addiu $2, $sp, 256 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 01 00 ll $2, 256($sp) + +# MIPSN32R6: 27 a2 01 00 addiu $2, $sp, 256 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 01 00 ll $2, 256($sp) + +# MIPS64R6: 67 a2 01 00 daddiu $2, $sp, 256 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -257($sp) +# MIPS32: c3 a2 fe ff ll $2, -257($sp) + +# MIPS32R6: 27 a2 fe ff addiu $2, $sp, -257 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 fe ff ll $2, -257($sp) + +# MIPSN32R6: 27 a2 fe ff addiu $2, $sp, -257 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 fe ff ll $2, -257($sp) + +# MIPS64R6: 67 a2 fe ff daddiu $2, $sp, -257 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, 32767($sp) +# MIPS32: c3 a2 7f ff ll $2, 32767($sp) + +# MIPS32R6: 27 a2 7f ff addiu $2, $sp, 32767 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 7f ff ll $2, 32767($sp) + +# MIPSN32R6: 27 a2 7f ff addiu $2, $sp, 32767 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 7f ff ll $2, 32767($sp) + +# MIPS64R6: 67 a2 7f ff daddiu $2, $sp, 32767 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, 32768($sp) +# MIPS32: 3c 02 00 01 lui $2, 1 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 80 00 ll $2, -32768($2) + +# MIPS32R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 00 01 lui $2, 1 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 80 00 ll $2, -32768($2) + +# MIPSN32R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 00 01 lui $2, 1 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 80 00 ll $2, -32768($2) + +# MIPS64R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -32768($sp) +# MIPS32: c3 a2 80 00 ll $2, -32768($sp) + +# MIPS32R6: 27 a2 80 00 addiu $2, $sp, -32768 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 80 00 ll $2, -32768($sp) + +# MIPSN32R6: 27 a2 80 00 addiu $2, $sp, -32768 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 80 00 ll $2, -32768($sp) + +# MIPS64R6: 67 a2 80 00 daddiu $2, $sp, -32768 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -32769($sp) +# MIPS32: 3c 02 ff ff lui $2, 65535 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 7f ff ll $2, 32767($2) + +# MIPS32R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPS32R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 ff ff lui $2, 65535 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 7f ff ll $2, 32767($2) + +# MIPSN32R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPSN32R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 ff ff lui $2, 65535 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 7f ff ll $2, 32767($2) + +# MIPS64R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPS64R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, 655987($sp) +# MIPS32: 3c 02 00 0a lui $2, 10 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 02 73 ll $2, 627($2) + +# MIPS32R6: 3c 02 00 0a aui $2, $zero, 10 +# MIPS32R6-NEXT: 34 42 02 73 ori $2, $2, 627 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 00 0a lui $2, 10 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 02 73 ll $2, 627($2) + +# MIPSN32R6: 3c 02 00 0a aui $2, $zero, 10 +# MIPSN32R6-NEXT: 34 42 02 73 ori $2, $2, 627 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 00 0a lui $2, 10 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 02 73 ll $2, 627($2) + +# MIPS64R6: 3c 02 00 0a aui $2, $zero, 10 +# MIPS64R6-NEXT: 34 42 02 73 ori $2, $2, 627 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -655987($sp) +# MIPS32: 3c 02 ff f6 lui $2, 65526 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 fd 8d ll $2, -627($2) + +# MIPS32R6: 3c 02 ff f5 aui $2, $zero, 65525 +# MIPS32R6-NEXT: 34 42 fd 8d ori $2, $2, 64909 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 ff f6 lui $2, 65526 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 fd 8d ll $2, -627($2) + +# MIPSN32R6: 3c 02 ff f5 aui $2, $zero, 65525 +# MIPSN32R6-NEXT: 34 42 fd 8d ori $2, $2, 64909 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 ff f6 lui $2, 65526 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 fd 8d ll $2, -627($2) + +# MIPS64R6: 3c 02 ff f5 aui $2, $zero, 65525 +# MIPS64R6-NEXT: 34 42 fd 8d ori $2, $2, 64909 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $12, symbol +# MIPS32: 3c 0c 00 00 lui $12, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 3c 0c 00 00 lui $12, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +ll $12, symbol($3) +# MIPS32: 3c 0c 00 00 lui $12, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPS32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 3c 0c 00 00 lui $12, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +ll $12, symbol+8 +# MIPS32: 3c 0c 00 00 lui $12, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: c1 8c 00 08 ll $12, 8($12) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 25 8c 00 08 addiu $12, $12, 8 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 3c 0c 00 00 lui $12, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPSN32-NEXT: R_MIPS_LO16 symbol+0x8 + +# MIPSN32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol+0x8 +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +.option pic2 + +ll $12, symbol +# MIPS32: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: c1 8c 00 00 ll $12, 0($12) + +# MIPS32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) + +# MIPSN32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +ll $12, symbol+8 +# MIPS32: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: c1 8c 00 08 ll $12, 8($12) + +# MIPS32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 25 8c 00 08 addiu $12, $12, 8 +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: c1 8c 00 08 ll $12, 8($12) + +# MIPSN32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 25 8c 00 08 addiu $12, $12, 8 +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: c1 8c 00 08 ll $12, 8($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 08 daddiu $12, $12, 8 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) diff --git a/llvm/test/MC/Mips/lld-expansion.s b/llvm/test/MC/Mips/lld-expansion.s new file mode 100644 index 0000000000000..48755d59a2400 --- /dev/null +++ b/llvm/test/MC/Mips/lld-expansion.s @@ -0,0 +1,188 @@ +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +lld $2, 128($sp) +# MIPS64: d3 a2 00 80 lld $2, 128($sp) +# MIPS64R6: 7f a2 40 37 lld $2, 128($sp) + +lld $2, -128($sp) +# MIPS64: d3 a2 ff 80 lld $2, -128($sp) +# MIPS64R6: 7f a2 c0 37 lld $2, -128($sp) + +lld $2, 256($sp) +# MIPS64: d3 a2 01 00 lld $2, 256($sp) + +# MIPS64R6: 67 a2 01 00 daddiu $2, $sp, 256 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -257($sp) +# MIPS64: d3 a2 fe ff lld $2, -257($sp) + +# MIPS64R6: 67 a2 fe ff daddiu $2, $sp, -257 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 32767($sp) +# MIPS64: d3 a2 7f ff lld $2, 32767($sp) + +# MIPS64R6: 67 a2 7f ff daddiu $2, $sp, 32767 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 32768($sp) +# MIPS64: 3c 02 00 01 lui $2, 1 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 80 00 lld $2, -32768($2) + +# MIPS64R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -32768($sp) +# MIPS64: d3 a2 80 00 lld $2, -32768($sp) + +# MIPS64R6: 67 a2 80 00 daddiu $2, $sp, -32768 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -32769($sp) +# MIPS64: 3c 02 ff ff lui $2, 65535 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 7f ff lld $2, 32767($2) + +# MIPS64R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPS64R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 2147483648($sp) +# MIPS64: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 00 00 lld $2, 0($2) + +# MIPS64R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64R6-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -2147483648($sp) +# MIPS64: 3c 02 80 00 lui $2, 32768 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 00 00 lld $2, 0($2) + +# MIPS64R6: 3c 02 80 00 aui $2, $zero, 32768 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 9223372036853775808($sp) +# MIPS64: 3c 02 7f ff lui $2, 32767 +# MIPS64-NEXT: 34 42 ff ff ori $2, $2, 65535 +# MIPS64-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64-NEXT: 34 42 ff f1 ori $2, $2, 65521 +# MIPS64-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 bd c0 lld $2, -16960($2) + +# MIPS64R6: 3c 02 7f ff aui $2, $zero, 32767 +# MIPS64R6-NEXT: 34 42 ff ff ori $2, $2, 65535 +# MIPS64R6-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64R6-NEXT: 34 42 ff f0 ori $2, $2, 65520 +# MIPS64R6-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64R6-NEXT: 34 42 bd c0 ori $2, $2, 48576 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $12, symbol +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +lld $12, symbol($3) +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6-NEXT: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +lld $12, symbol+8 +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6-NEXT: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +.option pic2 + +lld $12, symbol +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +lld $12, symbol+8 +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: d1 8c 00 08 lld $12, 8($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 08 daddiu $12, $12, 8 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) diff --git a/llvm/test/MC/Mips/sc-expansion.s b/llvm/test/MC/Mips/sc-expansion.s index 76b30f174f9e2..b407f7aaf5700 100644 --- a/llvm/test/MC/Mips/sc-expansion.s +++ b/llvm/test/MC/Mips/sc-expansion.s @@ -1,48 +1,406 @@ # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips2 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r2 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips3 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips64 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips64r2 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips3 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r6 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSR6 -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips64r6 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSR6 - -# MIPS: e0 6c 00 00 sc $12, 0($3) -# MIPSR6: 7c 6c 00 26 sc $12, 0($3) -sc $12, 0($3) - -# MIPS: e0 6c 00 04 sc $12, 4($3) -# MIPSR6: 7c 6c 02 26 sc $12, 4($3) -sc $12, 4($3) - -# MIPS: 3c 01 00 00 lui $1, 0 -# MIPS: R_MIPS_HI16 symbol -# MIPS: e0 2c 00 00 sc $12, 0($1) -# MIPS: R_MIPS_LO16 symbol - -# MIPSR6: 3c 01 00 00 aui $1, $zero, 0 -# MIPSR6: R_MIPS_HI16 symbol -# MIPSR6: 24 21 00 00 addiu $1, $1, 0 -# MIPSR6: R_MIPS_LO16 symbol -# MIPSR6: 7c 2c 00 26 sc $12, 0($1) +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +sc $2, 128($sp) +# MIPS32: e3 a2 00 80 sc $2, 128($sp) +# MIPS32R6: 7f a2 40 26 sc $2, 128($sp) +# MIPSN32: e3 a2 00 80 sc $2, 128($sp) +# MIPSN32R6: 7f a2 40 26 sc $2, 128($sp) +# MIPS64: e3 a2 00 80 sc $2, 128($sp) +# MIPS64R6: 7f a2 40 26 sc $2, 128($sp) + +sc $2, -128($sp) +# MIPS32: e3 a2 ff 80 sc $2, -128($sp) +# MIPS32R6: 7f a2 c0 26 sc $2, -128($sp) +# MIPSN32: e3 a2 ff 80 sc $2, -128($sp) +# MIPSN32R6: 7f a2 c0 26 sc $2, -128($sp) +# MIPS64: e3 a2 ff 80 sc $2, -128($sp) +# MIPS64R6: 7f a2 c0 26 sc $2, -128($sp) + +sc $2, 256($sp) +# MIPS32: e3 a2 01 00 sc $2, 256($sp) + +# MIPS32R6: 27 a1 01 00 addiu $1, $sp, 256 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 01 00 sc $2, 256($sp) + +# MIPSN32R6: 27 a1 01 00 addiu $1, $sp, 256 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 01 00 sc $2, 256($sp) + +# MIPS64R6: 67 a1 01 00 daddiu $1, $sp, 256 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -257($sp) +# MIPS32: e3 a2 fe ff sc $2, -257($sp) + +# MIPS32R6: 27 a1 fe ff addiu $1, $sp, -257 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 fe ff sc $2, -257($sp) + +# MIPSN32R6: 27 a1 fe ff addiu $1, $sp, -257 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 fe ff sc $2, -257($sp) + +# MIPS64R6: 67 a1 fe ff daddiu $1, $sp, -257 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, 32767($sp) +# MIPS32: e3 a2 7f ff sc $2, 32767($sp) + +# MIPS32R6: 27 a1 7f ff addiu $1, $sp, 32767 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 7f ff sc $2, 32767($sp) + +# MIPSN32R6: 27 a1 7f ff addiu $1, $sp, 32767 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 7f ff sc $2, 32767($sp) + +# MIPS64R6: 67 a1 7f ff daddiu $1, $sp, 32767 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, 32768($sp) +# MIPS32: 3c 01 00 01 lui $1, 1 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 80 00 sc $2, -32768($1) + +# MIPS32R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 00 01 lui $1, 1 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 80 00 sc $2, -32768($1) + +# MIPSN32R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 00 01 lui $1, 1 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 80 00 sc $2, -32768($1) + +# MIPS64R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -32768($sp) +# MIPS32: e3 a2 80 00 sc $2, -32768($sp) + +# MIPS32R6: 27 a1 80 00 addiu $1, $sp, -32768 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 80 00 sc $2, -32768($sp) + +# MIPSN32R6: 27 a1 80 00 addiu $1, $sp, -32768 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 80 00 sc $2, -32768($sp) + +# MIPS64R6: 67 a1 80 00 daddiu $1, $sp, -32768 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -32769($sp) +# MIPS32: 3c 01 ff ff lui $1, 65535 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 7f ff sc $2, 32767($1) + +# MIPS32R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPS32R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 ff ff lui $1, 65535 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 7f ff sc $2, 32767($1) + +# MIPSN32R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPSN32R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 ff ff lui $1, 65535 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 7f ff sc $2, 32767($1) + +# MIPS64R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPS64R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, 655987($sp) +# MIPS32: 3c 01 00 0a lui $1, 10 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 02 73 sc $2, 627($1) + +# MIPS32R6: 3c 01 00 0a aui $1, $zero, 10 +# MIPS32R6-NEXT: 34 21 02 73 ori $1, $1, 627 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 00 0a lui $1, 10 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 02 73 sc $2, 627($1) + +# MIPSN32R6: 3c 01 00 0a aui $1, $zero, 10 +# MIPSN32R6-NEXT: 34 21 02 73 ori $1, $1, 627 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 00 0a lui $1, 10 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 02 73 sc $2, 627($1) + +# MIPS64R6: 3c 01 00 0a aui $1, $zero, 10 +# MIPS64R6-NEXT: 34 21 02 73 ori $1, $1, 627 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -655987($sp) +# MIPS32: 3c 01 ff f6 lui $1, 65526 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 fd 8d sc $2, -627($1) + +# MIPS32R6: 3c 01 ff f5 aui $1, $zero, 65525 +# MIPS32R6-NEXT: 34 21 fd 8d ori $1, $1, 64909 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 ff f6 lui $1, 65526 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 fd 8d sc $2, -627($1) + +# MIPSN32R6: 3c 01 ff f5 aui $1, $zero, 65525 +# MIPSN32R6-NEXT: 34 21 fd 8d ori $1, $1, 64909 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 ff f6 lui $1, 65526 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 fd 8d sc $2, -627($1) + +# MIPS64R6: 3c 01 ff f5 aui $1, $zero, 65525 +# MIPS64R6-NEXT: 34 21 fd 8d ori $1, $1, 64909 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $12, symbol +# MIPS32: 3c 01 00 00 lui $1, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 3c 01 00 00 lui $1, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +sc $12, symbol($3) +# MIPS32: 3c 01 00 00 lui $1, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPS32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 3c 01 00 00 lui $1, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +sc $12, symbol+8 +# MIPS32: 3c 01 00 00 lui $1, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: e0 2c 00 08 sc $12, 8($1) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 24 21 00 08 addiu $1, $1, 8 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 3c 01 00 00 lui $1, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPSN32-NEXT: R_MIPS_LO16 symbol+0x8 + +# MIPSN32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol+0x8 +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +.option pic2 + sc $12, symbol +# MIPS32: 8f 81 00 00 lw $1, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: e0 2c 00 00 sc $12, 0($1) + +# MIPS32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) + +# MIPSN32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) + +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +sc $12, symbol+8 +# MIPS32: 8f 81 00 00 lw $1, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: e0 2c 00 08 sc $12, 8($1) + +# MIPS32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 24 21 00 08 addiu $1, $1, 8 +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: e0 2c 00 08 sc $12, 8($1) + +# MIPSN32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 24 21 00 08 addiu $1, $1, 8 +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: e0 2c 00 08 sc $12, 8($1) -# MIPS: 3c 01 00 00 lui $1, 0 -# MIPS: R_MIPS_HI16 symbol -# MIPS: e0 2c 00 08 sc $12, 8($1) -# MIPS: R_MIPS_LO16 symbol - -# MIPSR6: 3c 01 00 00 aui $1, $zero, 0 -# MIPSR6: R_MIPS_HI16 symbol -# MIPSR6: 24 21 00 08 addiu $1, $1, 8 -# MIPSR6: R_MIPS_LO16 symbol -# MIPSR6: 7c 2c 00 26 sc $12, 0($1) -sc $12, symbol + 8 +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 08 daddiu $1, $1, 8 +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) diff --git a/llvm/test/MC/Mips/scd-expansion.s b/llvm/test/MC/Mips/scd-expansion.s new file mode 100644 index 0000000000000..54a3baa5d68fe --- /dev/null +++ b/llvm/test/MC/Mips/scd-expansion.s @@ -0,0 +1,188 @@ +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +scd $2, 128($sp) +# MIPS64: f3 a2 00 80 scd $2, 128($sp) +# MIPS64R6: 7f a2 40 27 scd $2, 128($sp) + +scd $2, -128($sp) +# MIPS64: f3 a2 ff 80 scd $2, -128($sp) +# MIPS64R6: 7f a2 c0 27 scd $2, -128($sp) + +scd $2, 256($sp) +# MIPS64: f3 a2 01 00 scd $2, 256($sp) + +# MIPS64R6: 67 a1 01 00 daddiu $1, $sp, 256 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -257($sp) +# MIPS64: f3 a2 fe ff scd $2, -257($sp) + +# MIPS64R6: 67 a1 fe ff daddiu $1, $sp, -257 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 32767($sp) +# MIPS64: f3 a2 7f ff scd $2, 32767($sp) + +# MIPS64R6: 67 a1 7f ff daddiu $1, $sp, 32767 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 32768($sp) +# MIPS64: 3c 01 00 01 lui $1, 1 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 80 00 scd $2, -32768($1) + +# MIPS64R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -32768($sp) +# MIPS64: f3 a2 80 00 scd $2, -32768($sp) + +# MIPS64R6: 67 a1 80 00 daddiu $1, $sp, -32768 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -32769($sp) +# MIPS64: 3c 01 ff ff lui $1, 65535 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 7f ff scd $2, 32767($1) + +# MIPS64R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPS64R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 2147483648($sp) +# MIPS64: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 00 00 scd $2, 0($1) + +# MIPS64R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -2147483648($sp) +# MIPS64: 3c 01 80 00 lui $1, 32768 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 00 00 scd $2, 0($1) + +# MIPS64R6: 3c 01 80 00 aui $1, $zero, 32768 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 9223372036853775808($sp) +# MIPS64: 3c 01 7f ff lui $1, 32767 +# MIPS64-NEXT: 34 21 ff ff ori $1, $1, 65535 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 34 21 ff f1 ori $1, $1, 65521 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 bd c0 scd $2, -16960($1) + +# MIPS64R6: 3c 01 7f ff aui $1, $zero, 32767 +# MIPS64R6-NEXT: 34 21 ff ff ori $1, $1, 65535 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 34 21 ff f0 ori $1, $1, 65520 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 34 21 bd c0 ori $1, $1, 48576 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $12, symbol +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +scd $12, symbol($3) +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +scd $12, symbol+8 +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +.option pic2 + +scd $12, symbol +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) + +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +scd $12, symbol+8 +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: f0 2c 00 08 scd $12, 8($1) + +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 08 daddiu $1, $1, 8 +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) diff --git a/llvm/test/MC/X86/x86-jcxz-loop-fixup.s b/llvm/test/MC/X86/x86-jcxz-loop-fixup.s new file mode 100644 index 0000000000000..219c1bb52eb6b --- /dev/null +++ b/llvm/test/MC/X86/x86-jcxz-loop-fixup.s @@ -0,0 +1,26 @@ +# RUN: not llvm-mc -filetype=obj -triple=x86_64-linux-gnu %s 2>&1 | FileCheck %s + + .balign 128 +label00: +// CHECK: value of 253 is too large for field of 1 byte. + jecxz label01 +// CHECK: value of 251 is too large for field of 1 byte. + jrcxz label01 +// CHECK: value of 249 is too large for field of 1 byte. + loop label01 +// CHECK: value of 247 is too large for field of 1 byte. + loope label01 +// CHECK: value of 245 is too large for field of 1 byte. + loopne label01 + .balign 256 +label01: +// CHECK: value of -259 is too large for field of 1 byte. + jecxz label00 +// CHECK: value of -261 is too large for field of 1 byte. + jrcxz label00 +// CHECK: value of -263 is too large for field of 1 byte. + loop label00 +// CHECK: value of -265 is too large for field of 1 byte. + loope label00 +// CHECK: value of -267 is too large for field of 1 byte. + loopne label00 diff --git a/llvm/test/MachineVerifier/verify-regops.mir b/llvm/test/MachineVerifier/verify-regops.mir new file mode 100644 index 0000000000000..9219586ffc03b --- /dev/null +++ b/llvm/test/MachineVerifier/verify-regops.mir @@ -0,0 +1,37 @@ +# RUN: not llc -march=x86 -o - %s -run-pass=none -verify-machineinstrs \ +# RUN: 2>&1 | FileCheck %s +# REQUIRES: x86-registered-target +# +# Check that MachineVerifier catches corrupt operands where MO->isReg() +# returns true, but the descriptor says it should be an OPERAND_IMMEDIATE or +# OPERAND_PCREL. Conversely, if MO->isReg() (and MO->isFI()) returns false, +# check that not an OPERAND_REGISTER is expected. + +# CHECK-LABEL: fun + +# CHECK: *** Bad machine code: Expected a register operand. *** +# CHECK: - instruction: %1:gr32 = XOR32rm -1, %fixed-stack.1, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %fixed-stack.1, align 8) +# CHECK: - operand 1: -1 + +# CHECK: *** Bad machine code: Expected a non-register operand. *** +# CHECK: - instruction: %2:gr32 = OR32ri %1:gr32(tied-def 0), %0:gr32, implicit-def dead $eflags +# CHECK: - operand 2: %0:gr32 + + +name: fun +tracksRegLiveness: true +fixedStack: + - { id: 1, offset: 8, size: 4, alignment: 8, isImmutable: true } + - { id: 3, size: 4, alignment: 16, isImmutable: true } +body: | + bb.0: + %0:gr32 = MOV32rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.3, align 16) + ; Was: %1:gr32 = XOR32rm %0, %fixed-stack.1, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %fixed-stack.1, align 8) + %1:gr32 = XOR32rm -1, %fixed-stack.1, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %fixed-stack.1, align 8) + ; Was: %2:gr32 = OR32ri %1, -256, implicit-def dead $eflags + %2:gr32 = OR32ri %1, %0, implicit-def dead $eflags + %3:gr32 = MOV32ri -1 + $eax = COPY %2 + $edx = COPY %3 + RET 0, $eax, $edx +... diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test index 254b8f43e7bcd..37563652bd630 100644 --- a/llvm/test/Object/invalid.test +++ b/llvm/test/Object/invalid.test @@ -41,7 +41,7 @@ Sections: # RUN: not llvm-objdump -s %p/Inputs/invalid-strtab-size.elf 2>&1 \ # RUN: | FileCheck %s -DFILE=%p/Inputs/invalid-strtab-size.elf --check-prefix=INVALID-STRTAB-SIZE -# INVALID-STRTAB-SIZE: error: '[[FILE]]': section [index 1] has a sh_offset (0x70) + sh_size (0xffffff) that cannot be represented +# INVALID-STRTAB-SIZE: error: '[[FILE]]': section [index 1] has a sh_offset (0x70) + sh_size (0xffffff) that is greater than the file size (0x218) ## Check that llvm-dwarfdump reports an error during relocation resolution ## when instead of expected SHT_RELA section it locates a section of a different type. @@ -252,7 +252,7 @@ Symbols: [] # RUN: not llvm-readobj -r %t12 2>&1 | FileCheck -DFILE=%t12 --check-prefix=INVALID-RELOC-SH-OFFSET %s # RUN: not llvm-readobj -r %t13 2>&1 | FileCheck -DFILE=%t13 --check-prefix=INVALID-RELOC-SH-OFFSET %s -# INVALID-RELOC-SH-OFFSET: error: '[[FILE]]': section [index 1] has a sh_offset (0x10000) + sh_size (0x0) that cannot be represented +# INVALID-RELOC-SH-OFFSET: error: '[[FILE]]': section [index 1] has a sh_offset (0x10000) + sh_size (0x0) that is greater than the file size (0x160) --- !ELF FileHeader: @@ -286,7 +286,7 @@ Sections: --- !ELF FileHeader: - Class: ELFCLASS64 + Class: ELFCLASS32 Data: ELFDATA2LSB Type: ET_REL Machine: EM_386 @@ -375,7 +375,7 @@ Sections: # RUN: not llvm-readobj --sections --section-data %t18 2>&1 \ # RUN: | FileCheck -DFILE=%t18 --check-prefix=BROKEN-SECSHOFFSET %s -# BROKEN-SECSHOFFSET: error: '[[FILE]]': section [index 1] has a sh_offset (0xffff0000) + sh_size (0x0) that cannot be represented +# BROKEN-SECSHOFFSET: error: '[[FILE]]': section [index 1] has a sh_offset (0xffff0000) + sh_size (0x0) that is greater than the file size (0x160) --- !ELF FileHeader: diff --git a/llvm/test/Other/2010-05-06-Printer.ll b/llvm/test/Other/2010-05-06-Printer.ll index 9e7c9cb6ab4a8..decd977c3d212 100644 --- a/llvm/test/Other/2010-05-06-Printer.ll +++ b/llvm/test/Other/2010-05-06-Printer.ll @@ -16,6 +16,5 @@ define void @foo(){ ;ALL: ModuleID = ;FOO: IR Dump After -;FOO-EMPTY: ;FOO-NEXT: define void @foo() ;FOO-NOT: define void @tester diff --git a/llvm/test/Other/printer.ll b/llvm/test/Other/printer.ll index 9785a17b2280a..8633765628550 100644 --- a/llvm/test/Other/printer.ll +++ b/llvm/test/Other/printer.ll @@ -1,5 +1,7 @@ -; RUN: opt -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -passes='mem2reg,instcombine' -print-after-all -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | \ +; RUN: FileCheck --check-prefixes=CHECK,OLDPM %s --implicit-check-not='IR Dump' +; RUN: opt -passes='mem2reg,instcombine' -print-after-all -disable-output < %s 2>&1 | \ +; RUN: FileCheck --check-prefixes=CHECK,NEWPM %s --implicit-check-not='IR Dump' define void @tester(){ ret void } @@ -8,21 +10,14 @@ define void @foo(){ ret void } -;CHECK-NOT: IR Dump After PassManager -;CHECK-NOT: IR Dump After ModuleToFunctionPassAdaptor -; -;CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} -;CHECK: define void @tester -;CHECK-NOT: define void @foo -;CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} -;CHECK: define void @tester -;CHECK-NOT: define void @foo -;CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} -;CHECK: define void @foo -;CHECK-NOT: define void @tester -;CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} -;CHECK: define void @foo -;CHECK-NOT: define void @tester -;CHECK: *** IR Dump After {{Module Verifier|VerifierPass}} -; -;CHECK-NOT: IR Dump After Print Module IR +; NEWPM: *** IR Dump After VerifierPass +; CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} +; CHECK-NEXT: define void @tester +; CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} +; CHECK-NEXT: define void @tester +; OLDPM: *** IR Dump After Module Verifier +; CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} +; CHECK-NEXT: define void @foo +; CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} +; CHECK-NEXT: define void @foo +; CHECK: *** IR Dump After {{Module Verifier|VerifierPass}} diff --git a/llvm/test/ThinLTO/X86/diagnostic-handler-remarks.ll b/llvm/test/ThinLTO/X86/diagnostic-handler-remarks.ll index b83e93b7a29de..a996b8586eef8 100644 --- a/llvm/test/ThinLTO/X86/diagnostic-handler-remarks.ll +++ b/llvm/test/ThinLTO/X86/diagnostic-handler-remarks.ll @@ -51,6 +51,19 @@ ; YAML2-NEXT: - String: ')' ; YAML2-NEXT: ... +; The file extension depends on the format of the remarks +; RUN: rm -f %t.bitstream.thin.0.bitstream %t.bitstream.thin.1.bitstream +; RUN: llvm-lto -thinlto-action=run \ +; RUN: -lto-pass-remarks-output=%t.bitstream \ +; RUN: -lto-pass-remarks-filter=inline \ +; RUN: -lto-pass-remarks-format=bitstream \ +; RUN: -exported-symbol _func2 \ +; RUN: -exported-symbol _main %t1.bc %t2.bc 2>&1 | \ +; RUN: FileCheck %s -allow-empty +; RUN: llvm-bcanalyzer %t.bitstream.thin.0.bitstream +; RUN: llvm-bcanalyzer %t.bitstream.thin.1.bitstream +; CHECK-NOT: remark: +; CHECK-NOT: llvm-lto: target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/ThinLTO/X86/index-const-prop2.ll b/llvm/test/ThinLTO/X86/index-const-prop2.ll index 430c7e8156d2a..928d00adc9a23 100644 --- a/llvm/test/ThinLTO/X86/index-const-prop2.ll +++ b/llvm/test/ThinLTO/X86/index-const-prop2.ll @@ -36,6 +36,8 @@ ; RUN: -o %t4 ; RUN: llvm-dis %t4.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT2 +; Run again but with main2 exported instead of main to check that write only +; variables are optimized out. ; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ ; RUN: -r=%t2.bc,foo,pl \ ; RUN: -r=%t2.bc,bar,pl \ @@ -49,7 +51,7 @@ ; RUN: -r=%t1.bc,baz, \ ; RUN: -r=%t1.bc,gBar, \ ; RUN: -o %t5 -; RUN: llvm-dis %t5.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t5.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT-WRITEONLY ; RUN: llvm-dis %t5.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN2 ; Check that gFoo and gBar were eliminated from source module together ; with corresponsing stores @@ -59,6 +61,10 @@ ; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 ; IMPORT: !DICompileUnit({{.*}}) +; Write only variables are imported with a zero initializer. +; IMPORT-WRITEONLY: @gFoo.llvm.0 = internal unnamed_addr global i32 0 +; IMPORT-WRITEONLY: @gBar = internal local_unnamed_addr global i32 0 + ; CODEGEN: i32 @main() ; CODEGEN-NEXT: ret i32 3 diff --git a/llvm/test/ThinLTO/X86/writeonly-with-refs.ll b/llvm/test/ThinLTO/X86/writeonly-with-refs.ll index 63f75762c39b1..787d977582211 100644 --- a/llvm/test/ThinLTO/X86/writeonly-with-refs.ll +++ b/llvm/test/ThinLTO/X86/writeonly-with-refs.ll @@ -7,10 +7,22 @@ ; RUN: -r=%t2,outer,pl ; @outer should have been internalized and converted to zeroinitilizer. -; RUN: llvm-dis %t-out.1.5.precodegen.bc -o - | FileCheck %s -; RUN: llvm-dis %t-out.2.5.precodegen.bc -o - | FileCheck %s +; RUN: llvm-dis %t-out.1.3.import.bc -o - | FileCheck %s +; RUN: llvm-dis %t-out.2.3.import.bc -o - | FileCheck %s -; CHECK: @outer = internal unnamed_addr global %struct.Q zeroinitializer +; CHECK: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer + +; Test again in distributed ThinLTO mode. +; RUN: llvm-lto2 run -save-temps %t1 %t2 -o %t-out \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t1,main,plx \ +; RUN: -r=%t1,_Z3foov,l \ +; RUN: -r=%t2,_Z3foov,pl \ +; RUN: -r=%t2,outer,pl +; RUN: opt -function-import -import-all-index -enable-import-metadata -summary-file %t1.thinlto.bc %t1 -o %t1.out +; RUN: opt -function-import -import-all-index -summary-file %t2.thinlto.bc %t2 -o %t2.out +; RUN: llvm-dis %t1.out -o - | FileCheck %s +; RUN: llvm-dis %t2.out -o - | FileCheck %s source_filename = "main.cpp" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/ThinLTO/X86/writeonly.ll b/llvm/test/ThinLTO/X86/writeonly.ll index 20f4533efe686..27305e160ea71 100644 --- a/llvm/test/ThinLTO/X86/writeonly.ll +++ b/llvm/test/ThinLTO/X86/writeonly.ll @@ -11,8 +11,8 @@ ; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT ; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s --check-prefix=OPTIMIZE -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !0 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4, !dbg !0 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 0, align 4, !dbg !5 ; IMPORT: !DICompileUnit({{.*}}) ; STATS: 2 module-summary-index - Number of live global variables marked write only diff --git a/llvm/test/ThinLTO/X86/writeonly2.ll b/llvm/test/ThinLTO/X86/writeonly2.ll index a7383f25b4822..2648727f09971 100644 --- a/llvm/test/ThinLTO/X86/writeonly2.ll +++ b/llvm/test/ThinLTO/X86/writeonly2.ll @@ -19,8 +19,8 @@ ; with corresponsing stores ; RUN: llvm-dis %t3.2.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN-SRC -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 0, align 4 ; IMPORT: !DICompileUnit({{.*}}) ; CODEGEN-NOT: gFoo diff --git a/llvm/test/Transforms/FunctionAttrs/align.ll b/llvm/test/Transforms/Attributor/align.ll similarity index 85% rename from llvm/test/Transforms/FunctionAttrs/align.ll rename to llvm/test/Transforms/Attributor/align.ll index da7bd1b5cc9a8..a5bf91915baf8 100644 --- a/llvm/test/Transforms/FunctionAttrs/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -337,5 +337,64 @@ define i64 @test11(i32* %p) { %ret = load i64, i64* %p-cast, align 8 ret i64 %ret } + +; TEST 12 +; Test for deduction using must-be-executed-context and GEP instruction + +; FXIME: %p should have nonnull +; ATTRIBUTOR: define i64 @test12-1(i32* nocapture nofree readonly align 16 %p) +define i64 @test12-1(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 1 + %arrayidx1 = getelementptr i64, i64* %arrayidx0, i64 3 + %ret = load i64, i64* %arrayidx1, align 16 + ret i64 %ret +} + +; ATTRIBUTOR: define i64 @test12-2(i32* nocapture nofree nonnull readonly align 16 dereferenceable(8) %p) +define i64 @test12-2(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 + %ret = load i64, i64* %arrayidx0, align 16 + ret i64 %ret +} + +; FXIME: %p should have nonnull +; ATTRIBUTOR: define void @test12-3(i32* nocapture nofree writeonly align 16 %p) +define void @test12-3(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 1 + %arrayidx1 = getelementptr i64, i64* %arrayidx0, i64 3 + store i64 0, i64* %arrayidx1, align 16 + ret void +} + +; ATTRIBUTOR: define void @test12-4(i32* nocapture nofree nonnull writeonly align 16 dereferenceable(8) %p) +define void @test12-4(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 + store i64 0, i64* %arrayidx0, align 16 + ret void +} + +declare void @use(i64*) willreturn nounwind + +; ATTRIBUTOR: define void @test12-5(i32* align 16 %p) +define void @test12-5(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 1 + %arrayidx1 = getelementptr i64, i64* %arrayidx0, i64 3 + tail call void @use(i64* align 16 %arrayidx1) + ret void +} + +; ATTRIBUTOR: define void @test12-6(i32* align 16 %p) +define void @test12-6(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 + tail call void @use(i64* align 16 %arrayidx0) + ret void +} + attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/callbacks.ll rename to llvm/test/Transforms/Attributor/callbacks.ll diff --git a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll similarity index 96% rename from llvm/test/Transforms/FunctionAttrs/dereferenceable.ll rename to llvm/test/Transforms/Attributor/dereferenceable-1.ll index 1c285fa288370..951b5047747f0 100644 --- a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -30,8 +30,7 @@ define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { } define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { -; FIXME: We should not have both deref(x) and deref_or_null(y) with x >= y. -; ATTRIBUTOR: define nonnull dereferenceable(16) i32* @test3_2(i32* nofree nonnull readnone dereferenceable(32) dereferenceable_or_null(32) "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define nonnull dereferenceable(16) i32* @test3_2(i32* nofree nonnull readnone dereferenceable(32) "no-capture-maybe-returned" %0) %ret = getelementptr inbounds i32, i32* %0, i64 4 ret i32* %ret } @@ -202,3 +201,9 @@ define i32* @test_for_minus_index(i32* %p) { store i32 1, i32* %q ret i32* %q } + +define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { +; ATTRIBUTOR: define void @deref_or_null_and_nonnull(i32* nocapture nofree nonnull writeonly dereferenceable(100) %0) + store i32 1, i32* %0 + ret void +} diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll new file mode 100644 index 0000000000000..b3c0440f930f4 --- /dev/null +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -0,0 +1,356 @@ +; RUN: opt < %s -attributor --attributor-disable=false -S | FileCheck %s --check-prefix=ATTRIBUTOR +; Copied from Transforms/InferFunctionAttrs/dereferenceable.ll + +; Determine dereference-ability before unused loads get deleted: +; https://bugs.llvm.org/show_bug.cgi?id=21780 + +define <4 x double> @PR21780(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) + + ; GEP of index 0 is simplified away. + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 + %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 + %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 + + %t0 = load double, double* %ptr, align 8 + %t1 = load double, double* %arrayidx1, align 8 + %t2 = load double, double* %arrayidx2, align 8 + %t3 = load double, double* %arrayidx3, align 8 + + %vecinit0 = insertelement <4 x double> undef, double %t0, i32 0 + %vecinit1 = insertelement <4 x double> %vecinit0, double %t1, i32 1 + %vecinit2 = insertelement <4 x double> %vecinit1, double %t2, i32 2 + %vecinit3 = insertelement <4 x double> %vecinit2, double %t3, i32 3 + %shuffle = shufflevector <4 x double> %vecinit3, <4 x double> %vecinit3, <4 x i32> + ret <4 x double> %shuffle +} + + +define double @PR21780_only_access3_with_inbounds(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780_only_access3_with_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) + + %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 + %t3 = load double, double* %arrayidx3, align 8 + ret double %t3 +} + +define double @PR21780_only_access3_without_inbounds(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780_only_access3_without_inbounds(double* nocapture nofree readonly align 8 %ptr) + %arrayidx3 = getelementptr double, double* %ptr, i64 3 + %t3 = load double, double* %arrayidx3, align 8 + ret double %t3 +} + +define double @PR21780_without_inbounds(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780_without_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) + + %arrayidx1 = getelementptr double, double* %ptr, i64 1 + %arrayidx2 = getelementptr double, double* %ptr, i64 2 + %arrayidx3 = getelementptr double, double* %ptr, i64 3 + + %t0 = load double, double* %ptr, align 8 + %t1 = load double, double* %arrayidx1, align 8 + %t2 = load double, double* %arrayidx2, align 8 + %t3 = load double, double* %arrayidx3, align 8 + + ret double %t3 +} + +; Unsimplified, but still valid. Also, throw in some bogus arguments. + +define void @gep0(i8* %unused, i8* %other, i8* %ptr) { +; ATTRIBUTOR-LABEL: @gep0(i8* nocapture nofree readnone %unused, i8* nocapture nofree nonnull writeonly dereferenceable(1) %other, i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr) + %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 + %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %arrayidx0 + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + store i8 %t2, i8* %other + ret void +} + +; Order of accesses does not change computation. +; Multiple arguments may be dereferenceable. + +define void @ordering(i8* %ptr1, i32* %ptr2) { +; ATTRIBUTOR-LABEL: @ordering(i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr1, i32* nocapture nofree nonnull readonly dereferenceable(8) %ptr2) + %a20 = getelementptr i32, i32* %ptr2, i64 0 + %a12 = getelementptr i8, i8* %ptr1, i64 2 + %t12 = load i8, i8* %a12 + %a11 = getelementptr i8, i8* %ptr1, i64 1 + %t20 = load i32, i32* %a20 + %a10 = getelementptr i8, i8* %ptr1, i64 0 + %t10 = load i8, i8* %a10 + %t11 = load i8, i8* %a11 + %a21 = getelementptr i32, i32* %ptr2, i64 1 + %t21 = load i32, i32* %a21 + ret void +} + +; Not in entry block. + +define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { +; ATTRIBUTOR-LABEL: @not_entry_but_guaranteed_to_execute(i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr) +entry: + br label %exit +exit: + %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 + %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %arrayidx0 + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + ret void +} + +; Not in entry block and not guaranteed to execute. + +define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { +; ATTRIBUTOR-LABEL: @not_entry_not_guaranteed_to_execute(i8* nocapture nofree readonly %ptr, i1 %cond) +entry: + br i1 %cond, label %loads, label %exit +loads: + %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 + %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %arrayidx0 + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + ret void +exit: + ret void +} + +; The last load may not execute, so derefenceable bytes only covers the 1st two loads. + +define void @partial_in_entry(i16* %ptr, i1 %cond) { +; ATTRIBUTOR-LABEL: @partial_in_entry(i16* nocapture nofree nonnull readonly dereferenceable(4) %ptr, i1 %cond) +entry: + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load i16, i16* %arrayidx0 + %t1 = load i16, i16* %arrayidx1 + br i1 %cond, label %loads, label %exit +loads: + %t2 = load i16, i16* %arrayidx2 + ret void +exit: + ret void +} + +; The volatile load can't be used to prove a non-volatile access is allowed. +; The 2nd and 3rd loads may never execute. + +define void @volatile_is_not_dereferenceable(i16* %ptr) { +; ATTRIBUTOR-LABEL: @volatile_is_not_dereferenceable(i16* nofree %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load volatile i16, i16* %arrayidx0 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +; TODO: We should allow inference for atomic (but not volatile) ops. + +define void @atomic_is_alright(i16* %ptr) { +; ATTRIBUTOR-LABEL: @atomic_is_alright(i16* nocapture nofree nonnull readonly align 2 dereferenceable(6) %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load atomic i16, i16* %arrayidx0 unordered, align 2 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +declare void @may_not_return() + +define void @not_guaranteed_to_transfer_execution(i16* %ptr) { +; ATTRIBUTOR-LABEL: @not_guaranteed_to_transfer_execution(i16* nocapture nonnull readonly dereferenceable(2) %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load i16, i16* %arrayidx0 + call void @may_not_return() + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +; We must have consecutive accesses. + +define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { +; ATTRIBUTOR-LABEL: @variable_gep_index(i8* nocapture nofree readnone %unused, i8* nocapture nofree nonnull readonly dereferenceable(1) %ptr, i64 %variable_index) + %arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %ptr + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + ret void +} + +; Deal with >1 GEP index. + +define void @multi_index_gep(<4 x i8>* %ptr) { +; FIXME: %ptr should be dereferenceable(4) +; ATTRIBUTOR-LABEL: @multi_index_gep(<4 x i8>* nocapture nofree nonnull readonly dereferenceable(1) %ptr) + %arrayidx00 = getelementptr <4 x i8>, <4 x i8>* %ptr, i64 0, i64 0 + %t0 = load i8, i8* %arrayidx00 + ret void +} + +; Could round weird bitwidths down? + +define void @not_byte_multiple(i9* %ptr) { +; ATTRIBUTOR-LABEL: @not_byte_multiple(i9* nocapture nofree nonnull readonly dereferenceable(2) %ptr) + %arrayidx0 = getelementptr i9, i9* %ptr, i64 0 + %t0 = load i9, i9* %arrayidx0 + ret void +} + +; Missing direct access from the pointer. + +define void @no_pointer_deref(i16* %ptr) { +; ATTRIBUTOR-LABEL: @no_pointer_deref(i16* nocapture nofree readonly %ptr) + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +; Out-of-order is ok, but missing access concludes dereferenceable range. + +define void @non_consecutive(i32* %ptr) { +; ATTRIBUTOR-LABEL: @non_consecutive(i32* nocapture nofree nonnull readonly dereferenceable(8) %ptr) + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %t1 = load i32, i32* %arrayidx1 + %t0 = load i32, i32* %arrayidx0 + %t3 = load i32, i32* %arrayidx3 + ret void +} + +; Improve on existing dereferenceable attribute. + +define void @more_bytes(i32* dereferenceable(8) %ptr) { +; ATTRIBUTOR-LABEL: @more_bytes(i32* nocapture nofree nonnull readonly dereferenceable(16) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + +; Improve on existing dereferenceable_or_null attribute. + +define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { +; ATTRIBUTOR-LABEL: @more_bytes_and_not_null(i32* nocapture nofree nonnull readonly dereferenceable(16) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + +; But don't pessimize existing dereferenceable attribute. + +define void @better_bytes(i32* dereferenceable(100) %ptr) { +; ATTRIBUTOR-LABEL: @better_bytes(i32* nocapture nofree nonnull readonly dereferenceable(100) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + +define void @bitcast(i32* %arg) { +; ATTRIBUTOR-LABEL: @bitcast(i32* nocapture nofree nonnull readonly dereferenceable(8) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 1 + %t0 = load float, float* %arrayidx0 + %t1 = load float, float* %arrayidx1 + ret void +} + +define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { +; ATTRIBUTOR-LABEL: @bitcast_different_sizes(double* nocapture nofree nonnull readonly dereferenceable(12) %arg1, i8* nocapture nofree nonnull readonly dereferenceable(16) %arg2) + %ptr1 = bitcast double* %arg1 to float* + %a10 = getelementptr float, float* %ptr1, i64 0 + %a11 = getelementptr float, float* %ptr1, i64 1 + %a12 = getelementptr float, float* %ptr1, i64 2 + %ld10 = load float, float* %a10 + %ld11 = load float, float* %a11 + %ld12 = load float, float* %a12 + + %ptr2 = bitcast i8* %arg2 to i64* + %a20 = getelementptr i64, i64* %ptr2, i64 0 + %a21 = getelementptr i64, i64* %ptr2, i64 1 + %ld20 = load i64, i64* %a20 + %ld21 = load i64, i64* %a21 + ret void +} + +define void @negative_offset(i32* %arg) { +; ATTRIBUTOR-LABEL: @negative_offset(i32* nocapture nofree nonnull readonly dereferenceable(4) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 -1 + %t0 = load float, float* %arrayidx0 + %t1 = load float, float* %arrayidx1 + ret void +} + +define void @stores(i32* %arg) { +; ATTRIBUTOR-LABEL: @stores(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 1 + store float 1.0, float* %arrayidx0 + store float 2.0, float* %arrayidx1 + ret void +} + +define void @load_store(i32* %arg) { +; ATTRIBUTOR-LABEL: @load_store(i32* nocapture nofree nonnull dereferenceable(8) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 1 + %t1 = load float, float* %arrayidx0 + store float 2.0, float* %arrayidx1 + ret void +} + +define void @different_size1(i32* %arg) { +; ATTRIBUTOR-LABEL: @different_size1(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + %arg-cast = bitcast i32* %arg to double* + store double 0.000000e+00, double* %arg-cast + store i32 0, i32* %arg + ret void +} + +define void @different_size2(i32* %arg) { +; ATTRIBUTOR-LABEL: @different_size2(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + store i32 0, i32* %arg + %arg-cast = bitcast i32* %arg to double* + store double 0.000000e+00, double* %arg-cast + ret void +} diff --git a/llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll rename to llvm/test/Transforms/Attributor/heap_to_stack.ll diff --git a/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/internal-noalias.ll rename to llvm/test/Transforms/Attributor/internal-noalias.ll diff --git a/llvm/test/Transforms/FunctionAttrs/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/liveness.ll rename to llvm/test/Transforms/Attributor/liveness.ll diff --git a/llvm/test/Transforms/FunctionAttrs/misc.ll b/llvm/test/Transforms/Attributor/misc.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/misc.ll rename to llvm/test/Transforms/Attributor/misc.ll diff --git a/llvm/test/Transforms/FunctionAttrs/new_attributes.ll b/llvm/test/Transforms/Attributor/new_attributes.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/new_attributes.ll rename to llvm/test/Transforms/Attributor/new_attributes.ll diff --git a/llvm/test/Transforms/FunctionAttrs/noalias_returned.ll b/llvm/test/Transforms/Attributor/noalias.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/noalias_returned.ll rename to llvm/test/Transforms/Attributor/noalias.ll diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll new file mode 100644 index 0000000000000..abb148d883ed4 --- /dev/null +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -0,0 +1,346 @@ +; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -S -attributor-annotate-decl-cs < %s | FileCheck %s --check-prefixes=ATTRIBUTOR +; RUN: opt -passes=attributor -attributor-manifest-internal -attributor-disable=false -S -attributor-annotate-decl-cs < %s | FileCheck %s --check-prefixes=ATTRIBUTOR +; Copied from Transforms/FunctoinAttrs/nocapture.ll + +@g = global i32* null ; [#uses=1] + +; ATTRIBUTOR: define i32* @c1(i32* nofree readnone returned "no-capture-maybe-returned" %q) +define i32* @c1(i32* %q) { + ret i32* %q +} + +; ATTRIBUTOR: define void @c2(i32* nofree writeonly %q) +; It would also be acceptable to mark %q as readnone. Update @c3 too. +define void @c2(i32* %q) { + store i32* %q, i32** @g + ret void +} + +; ATTRIBUTOR: define void @c3(i32* nofree writeonly %q) +define void @c3(i32* %q) { + call void @c2(i32* %q) + ret void +} + +; ATTRIBUTOR: define i1 @c4(i32* nofree readnone %q, i32 %bitno) +define i1 @c4(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = trunc i32 %tmp2 to i1 + br i1 %bit, label %l1, label %l0 +l0: + ret i1 0 ; escaping value not caught by def-use chaining. +l1: + ret i1 1 ; escaping value not caught by def-use chaining. +} + +; c4b is c4 but without the escaping part +; ATTRIBUTOR: define i1 @c4b(i32* nocapture nofree readnone %q, i32 %bitno) +define i1 @c4b(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = trunc i32 %tmp2 to i1 + br i1 %bit, label %l1, label %l0 +l0: + ret i1 0 ; not escaping! +l1: + ret i1 0 ; not escaping! +} + +@lookup_table = global [2 x i1] [ i1 0, i1 1 ] + +; ATTRIBUTOR: define i1 @c5(i32* nofree readonly %q, i32 %bitno) +define i1 @c5(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = and i32 %tmp2, 1 + ; subtle escape mechanism follows + %lookup = getelementptr [2 x i1], [2 x i1]* @lookup_table, i32 0, i32 %bit + %val = load i1, i1* %lookup + ret i1 %val +} + +declare void @throw_if_bit_set(i8*, i8) readonly + +; ATTRIBUTOR: define i1 @c6(i8* readonly %q, i8 %bit) +define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { + invoke void @throw_if_bit_set(i8* %q, i8 %bit) + to label %ret0 unwind label %ret1 +ret0: + ret i1 0 +ret1: + %exn = landingpad {i8*, i32} + cleanup + ret i1 1 +} + +declare i32 @__gxx_personality_v0(...) + +define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = and i32 %tmp2, 1 + %lookup = getelementptr [2 x i1], [2 x i1]* @lookup_table, i32 0, i32 %bit + ret i1* %lookup +} + +; ATTRIBUTOR: define i1 @c7(i32* nofree readonly %q, i32 %bitno) +define i1 @c7(i32* %q, i32 %bitno) { + %ptr = call i1* @lookup_bit(i32* %q, i32 %bitno) + %val = load i1, i1* %ptr + ret i1 %val +} + + +; ATTRIBUTOR: define i32 @nc1(i32* nofree %q, i32* nocapture nofree %p, i1 %b) +define i32 @nc1(i32* %q, i32* %p, i1 %b) { +e: + br label %l +l: + %x = phi i32* [ %p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = bitcast i32* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + +; ATTRIBUTOR: define i32 @nc1_addrspace(i32* nofree %q, i32 addrspace(1)* nocapture nofree %p, i1 %b) +define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* %p, i1 %b) { +e: + br label %l +l: + %x = phi i32 addrspace(1)* [ %p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = addrspacecast i32 addrspace(1)* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + +; ATTRIBUTOR: define void @nc2(i32* nocapture nofree %p, i32* nofree %q) +define void @nc2(i32* %p, i32* %q) { + %1 = call i32 @nc1(i32* %q, i32* %p, i1 0) ; [#uses=0] + ret void +} + + +; ATTRIBUTOR: define void @nc3(void ()* nocapture nofree nonnull %p) +define void @nc3(void ()* %p) { + call void %p() + ret void +} + +declare void @external(i8*) readonly nounwind +; ATTRIBUTOR: define void @nc4(i8* nocapture readonly %p) +define void @nc4(i8* %p) { + call void @external(i8* %p) + ret void +} + +; ATTRIBUTOR: define void @nc5(void (i8*)* nocapture nofree nonnull %f, i8* nocapture %p) +define void @nc5(void (i8*)* %f, i8* %p) { + call void %f(i8* %p) readonly nounwind + call void %f(i8* nocapture %p) + ret void +} + +; ATTRIBUTOR: define void @test1_1(i8* nocapture nofree readnone %x1_1, i8* nocapture nofree readnone %y1_1, i1 %c) +; It would be acceptable to add readnone to %y1_1 and %y1_2. +define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { + call i8* @test1_2(i8* %x1_1, i8* %y1_1, i1 %c) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define i8* @test1_2(i8* nocapture nofree readnone %x1_2, i8* nofree readnone returned "no-capture-maybe-returned" %y1_2, i1 %c) +define i8* @test1_2(i8* %x1_2, i8* %y1_2, i1 %c) { + br i1 %c, label %t, label %f +t: + call void @test1_1(i8* %x1_2, i8* %y1_2, i1 %c) + store i32* null, i32** @g + br label %f +f: + ret i8* %y1_2 +} + +; ATTRIBUTOR: define void @test2(i8* nocapture nofree readnone %x2) +define void @test2(i8* %x2) { + call void @test2(i8* %x2) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define void @test3(i8* nocapture nofree readnone %x3, i8* nocapture nofree readnone %y3, i8* nocapture nofree readnone %z3) +define void @test3(i8* %x3, i8* %y3, i8* %z3) { + call void @test3(i8* %z3, i8* %y3, i8* %x3) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define void @test4_1(i8* nocapture nofree readnone %x4_1, i1 %c) +define void @test4_1(i8* %x4_1, i1 %c) { + call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1, i1 %c) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define i8* @test4_2(i8* nocapture nofree readnone %x4_2, i8* nofree readnone returned "no-capture-maybe-returned" %y4_2, i8* nocapture nofree readnone %z4_2, i1 %c) +define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { + br i1 %c, label %t, label %f +t: + call void @test4_1(i8* null, i1 %c) + store i32* null, i32** @g + br label %f +f: + ret i8* %y4_2 +} + +declare i8* @test5_1(i8* %x5_1) + +; ATTRIBUTOR: define void @test5_2(i8* %x5_2) +define void @test5_2(i8* %x5_2) { + call i8* @test5_1(i8* %x5_2) + store i32* null, i32** @g + ret void +} + +declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...) + +; ATTRIBUTOR: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2) +define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) { + call void (i8*, i8*, ...) @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define void @test_cmpxchg(i32* nocapture nofree nonnull dereferenceable(4) %p) +define void @test_cmpxchg(i32* %p) { + cmpxchg i32* %p, i32 0, i32 1 acquire monotonic + ret void +} + +; ATTRIBUTOR: define void @test_cmpxchg_ptr(i32** nocapture nofree nonnull dereferenceable(8) %p, i32* nofree %q) +define void @test_cmpxchg_ptr(i32** %p, i32* %q) { + cmpxchg i32** %p, i32* null, i32* %q acquire monotonic + ret void +} + +; ATTRIBUTOR: define void @test_atomicrmw(i32* nocapture nofree nonnull dereferenceable(4) %p) +define void @test_atomicrmw(i32* %p) { + atomicrmw add i32* %p, i32 1 seq_cst + ret void +} + +; ATTRIBUTOR: define void @test_volatile(i32* nofree align 4 %x) +define void @test_volatile(i32* %x) { +entry: + %gep = getelementptr i32, i32* %x, i64 1 + store volatile i32 0, i32* %gep, align 4 + ret void +} + +; ATTRIBUTOR: nocaptureLaunder(i8* nocapture %p) +define void @nocaptureLaunder(i8* %p) { +entry: + %b = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + store i8 42, i8* %b + ret void +} + +@g2 = global i8* null +; ATTRIBUTOR: define void @captureLaunder(i8* %p) +define void @captureLaunder(i8* %p) { + %b = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + store i8* %b, i8** @g2 + ret void +} + +; ATTRIBUTOR: @nocaptureStrip(i8* nocapture writeonly %p) +define void @nocaptureStrip(i8* %p) { +entry: + %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + store i8 42, i8* %b + ret void +} + +@g3 = global i8* null +; ATTRIBUTOR: define void @captureStrip(i8* writeonly %p) +define void @captureStrip(i8* %p) { + %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + store i8* %b, i8** @g3 + ret void +} + +; ATTRIBUTOR: define i1 @captureICmp(i32* nofree readnone %x) +define i1 @captureICmp(i32* %x) { + %1 = icmp eq i32* %x, null + ret i1 %1 +} + +; ATTRIBUTOR: define i1 @captureICmpRev(i32* nofree readnone %x) +define i1 @captureICmpRev(i32* %x) { + %1 = icmp eq i32* null, %x + ret i1 %1 +} + +; ATTRIBUTOR: define i1 @nocaptureInboundsGEPICmp(i32* nocapture nofree nonnull readnone %x) +define i1 @nocaptureInboundsGEPICmp(i32* %x) { + %1 = getelementptr inbounds i32, i32* %x, i32 5 + %2 = bitcast i32* %1 to i8* + %3 = icmp eq i8* %2, null + ret i1 %3 +} + +; ATTRIBUTOR: define i1 @nocaptureInboundsGEPICmpRev(i32* nocapture nofree nonnull readnone %x) +define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { + %1 = getelementptr inbounds i32, i32* %x, i32 5 + %2 = bitcast i32* %1 to i8* + %3 = icmp eq i8* null, %2 + ret i1 %3 +} + +; ATTRIBUTOR: define i1 @nocaptureDereferenceableOrNullICmp(i32* nocapture nofree readnone dereferenceable_or_null(4) %x) +define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) { + %1 = bitcast i32* %x to i8* + %2 = icmp eq i8* %1, null + ret i1 %2 +} + +; ATTRIBUTOR: define i1 @captureDereferenceableOrNullICmp(i32* nofree readnone dereferenceable_or_null(4) %x) +define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) "null-pointer-is-valid"="true" { + %1 = bitcast i32* %x to i8* + %2 = icmp eq i8* %1, null + ret i1 %2 +} + +declare void @unknown(i8*) +define void @test_callsite() { +entry: +; We know that 'null' in AS 0 does not alias anything and cannot be captured. Though the latter is not qurried -> derived atm. +; ATTRIBUTOR: call void @unknown(i8* noalias null) + call void @unknown(i8* null) + ret void +} + +declare i8* @unknownpi8pi8(i8*,i8* returned) +define i8* @test_returned1(i8* %A, i8* returned %B) nounwind readonly { +; ATTRIBUTOR: define i8* @test_returned1(i8* nocapture readonly %A, i8* readonly returned %B) +entry: + %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) + ret i8* %p +} + +define i8* @test_returned2(i8* %A, i8* %B) { +; ATTRIBUTOR: define i8* @test_returned2(i8* nocapture readonly %A, i8* readonly returned %B) +entry: + %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) nounwind readonly + ret i8* %p +} + +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll similarity index 99% rename from llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll rename to llvm/test/Transforms/Attributor/nocapture-2.ll index fa4d984e931b0..79075268ed410 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll +++ b/llvm/test/Transforms/Attributor/nocapture-2.ll @@ -260,8 +260,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; FIXME: %a should have align 8 -; CHECK: define nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree nonnull writeonly dereferenceable(16) "no-capture-maybe-returned" %a) +; CHECK: define nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree nonnull writeonly align 8 dereferenceable(16) "no-capture-maybe-returned" %a) define i64* @not_captured_but_returned_1(i64* %a) #0 { entry: %add.ptr = getelementptr inbounds i64, i64* %a, i64 1 diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll new file mode 100644 index 0000000000000..d06a0ea1e9b08 --- /dev/null +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -0,0 +1,243 @@ +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; Copied from Transforms/FunctoinAttrs/nofree-attributor.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Test cases specifically designed for the "nofree" function attribute. +; We use FIXME's to indicate problems and missing attributes. + +; Free functions +declare void @free(i8* nocapture) local_unnamed_addr #1 +declare noalias i8* @realloc(i8* nocapture, i64) local_unnamed_addr #0 +declare void @_ZdaPv(i8*) local_unnamed_addr #2 + + +; TEST 1 (positive case) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @only_return() +define void @only_return() #0 { + ret void +} + + +; TEST 2 (negative case) +; Only free +; void only_free(char* p) { +; free(p); +; } + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR-NEXT: define void @only_free(i8* nocapture %0) local_unnamed_addr #1 +define void @only_free(i8* nocapture %0) local_unnamed_addr #0 { + tail call void @free(i8* %0) #1 + ret void +} + + +; TEST 3 (negative case) +; Free occurs in same scc. +; void free_in_scc1(char*p){ +; free_in_scc2(p); +; } +; void free_in_scc2(char*p){ +; free_in_scc1(p); +; free(p); +; } + + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR-NEXT :define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr +define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr #0 { + tail call void @free_in_scc2(i8* %0) #1 + ret void +} + + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR: define void @free_in_scc2(i8* nocapture %0) local_unnamed_addr +define void @free_in_scc2(i8* nocapture %0) local_unnamed_addr #0 { + %cmp = icmp eq i8* %0, null + br i1 %cmp, label %rec, label %call +call: + tail call void @free(i8* %0) #1 + br label %end +rec: + tail call void @free_in_scc1(i8* %0) + br label %end +end: + ret void +} + + +; TEST 4 (positive case) +; Free doesn't occur. +; void mutual_recursion1(){ +; mutual_recursion2(); +; } +; void mutual_recursion2(){ +; mutual_recursion1(); +; } + + +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @mutual_recursion1() +define void @mutual_recursion1() #0 { + call void @mutual_recursion2() + ret void +} + +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @mutual_recursion2() +define void @mutual_recursion2() #0 { + call void @mutual_recursion1() + ret void +} + + +; TEST 5 +; C++ delete operation (negative case) +; void delete_op (char p[]){ +; delete [] p; +; } + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR-NEXT: define void @_Z9delete_opPc(i8* %0) local_unnamed_addr #1 +define void @_Z9delete_opPc(i8* %0) local_unnamed_addr #0 { + %2 = icmp eq i8* %0, null + br i1 %2, label %4, label %3 + +;